]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/nilfs2/btree.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
[karo-tx-linux.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33
34 static void __nilfs_btree_init(struct nilfs_bmap *bmap);
35
36 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
37 {
38         struct nilfs_btree_path *path;
39         int level = NILFS_BTREE_LEVEL_DATA;
40
41         path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
42         if (path == NULL)
43                 goto out;
44
45         for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
46                 path[level].bp_bh = NULL;
47                 path[level].bp_sib_bh = NULL;
48                 path[level].bp_index = 0;
49                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
50                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
51                 path[level].bp_op = NULL;
52         }
53
54 out:
55         return path;
56 }
57
58 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
59 {
60         int level = NILFS_BTREE_LEVEL_DATA;
61
62         for (; level < NILFS_BTREE_LEVEL_MAX; level++)
63                 brelse(path[level].bp_bh);
64
65         kmem_cache_free(nilfs_btree_path_cache, path);
66 }
67
68 /*
69  * B-tree node operations
70  */
71 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
72                                      __u64 ptr, struct buffer_head **bhp)
73 {
74         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
75         struct buffer_head *bh;
76
77         bh = nilfs_btnode_create_block(btnc, ptr);
78         if (!bh)
79                 return -ENOMEM;
80
81         set_buffer_nilfs_volatile(bh);
82         *bhp = bh;
83         return 0;
84 }
85
86 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
87 {
88         return node->bn_flags;
89 }
90
91 static void
92 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
93 {
94         node->bn_flags = flags;
95 }
96
97 static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
98 {
99         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
100 }
101
102 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
103 {
104         return node->bn_level;
105 }
106
107 static void
108 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
109 {
110         node->bn_level = level;
111 }
112
113 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
114 {
115         return le16_to_cpu(node->bn_nchildren);
116 }
117
118 static void
119 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
120 {
121         node->bn_nchildren = cpu_to_le16(nchildren);
122 }
123
124 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
125 {
126         return 1 << btree->b_inode->i_blkbits;
127 }
128
129 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
130 {
131         return btree->b_nchildren_per_block;
132 }
133
134 static __le64 *
135 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
136 {
137         return (__le64 *)((char *)(node + 1) +
138                           (nilfs_btree_node_root(node) ?
139                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
140 }
141
142 static __le64 *
143 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
144 {
145         return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
146 }
147
148 static __u64
149 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
150 {
151         return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
152 }
153
154 static void
155 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
156 {
157         *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
158 }
159
160 static __u64
161 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
162                          int ncmax)
163 {
164         return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
165 }
166
167 static void
168 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
169                          int ncmax)
170 {
171         *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
172 }
173
174 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
175                                   int level, int nchildren, int ncmax,
176                                   const __u64 *keys, const __u64 *ptrs)
177 {
178         __le64 *dkeys;
179         __le64 *dptrs;
180         int i;
181
182         nilfs_btree_node_set_flags(node, flags);
183         nilfs_btree_node_set_level(node, level);
184         nilfs_btree_node_set_nchildren(node, nchildren);
185
186         dkeys = nilfs_btree_node_dkeys(node);
187         dptrs = nilfs_btree_node_dptrs(node, ncmax);
188         for (i = 0; i < nchildren; i++) {
189                 dkeys[i] = cpu_to_le64(keys[i]);
190                 dptrs[i] = cpu_to_le64(ptrs[i]);
191         }
192 }
193
194 /* Assume the buffer heads corresponding to left and right are locked. */
195 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
196                                        struct nilfs_btree_node *right,
197                                        int n, int lncmax, int rncmax)
198 {
199         __le64 *ldkeys, *rdkeys;
200         __le64 *ldptrs, *rdptrs;
201         int lnchildren, rnchildren;
202
203         ldkeys = nilfs_btree_node_dkeys(left);
204         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
205         lnchildren = nilfs_btree_node_get_nchildren(left);
206
207         rdkeys = nilfs_btree_node_dkeys(right);
208         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
209         rnchildren = nilfs_btree_node_get_nchildren(right);
210
211         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
212         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
213         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
214         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
215
216         lnchildren += n;
217         rnchildren -= n;
218         nilfs_btree_node_set_nchildren(left, lnchildren);
219         nilfs_btree_node_set_nchildren(right, rnchildren);
220 }
221
222 /* Assume that the buffer heads corresponding to left and right are locked. */
223 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
224                                         struct nilfs_btree_node *right,
225                                         int n, int lncmax, int rncmax)
226 {
227         __le64 *ldkeys, *rdkeys;
228         __le64 *ldptrs, *rdptrs;
229         int lnchildren, rnchildren;
230
231         ldkeys = nilfs_btree_node_dkeys(left);
232         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
233         lnchildren = nilfs_btree_node_get_nchildren(left);
234
235         rdkeys = nilfs_btree_node_dkeys(right);
236         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
237         rnchildren = nilfs_btree_node_get_nchildren(right);
238
239         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
240         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
241         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
242         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
243
244         lnchildren -= n;
245         rnchildren += n;
246         nilfs_btree_node_set_nchildren(left, lnchildren);
247         nilfs_btree_node_set_nchildren(right, rnchildren);
248 }
249
250 /* Assume that the buffer head corresponding to node is locked. */
251 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
252                                     __u64 key, __u64 ptr, int ncmax)
253 {
254         __le64 *dkeys;
255         __le64 *dptrs;
256         int nchildren;
257
258         dkeys = nilfs_btree_node_dkeys(node);
259         dptrs = nilfs_btree_node_dptrs(node, ncmax);
260         nchildren = nilfs_btree_node_get_nchildren(node);
261         if (index < nchildren) {
262                 memmove(dkeys + index + 1, dkeys + index,
263                         (nchildren - index) * sizeof(*dkeys));
264                 memmove(dptrs + index + 1, dptrs + index,
265                         (nchildren - index) * sizeof(*dptrs));
266         }
267         dkeys[index] = cpu_to_le64(key);
268         dptrs[index] = cpu_to_le64(ptr);
269         nchildren++;
270         nilfs_btree_node_set_nchildren(node, nchildren);
271 }
272
273 /* Assume that the buffer head corresponding to node is locked. */
274 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
275                                     __u64 *keyp, __u64 *ptrp, int ncmax)
276 {
277         __u64 key;
278         __u64 ptr;
279         __le64 *dkeys;
280         __le64 *dptrs;
281         int nchildren;
282
283         dkeys = nilfs_btree_node_dkeys(node);
284         dptrs = nilfs_btree_node_dptrs(node, ncmax);
285         key = le64_to_cpu(dkeys[index]);
286         ptr = le64_to_cpu(dptrs[index]);
287         nchildren = nilfs_btree_node_get_nchildren(node);
288         if (keyp != NULL)
289                 *keyp = key;
290         if (ptrp != NULL)
291                 *ptrp = ptr;
292
293         if (index < nchildren - 1) {
294                 memmove(dkeys + index, dkeys + index + 1,
295                         (nchildren - index - 1) * sizeof(*dkeys));
296                 memmove(dptrs + index, dptrs + index + 1,
297                         (nchildren - index - 1) * sizeof(*dptrs));
298         }
299         nchildren--;
300         nilfs_btree_node_set_nchildren(node, nchildren);
301 }
302
303 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
304                                    __u64 key, int *indexp)
305 {
306         __u64 nkey;
307         int index, low, high, s;
308
309         /* binary search */
310         low = 0;
311         high = nilfs_btree_node_get_nchildren(node) - 1;
312         index = 0;
313         s = 0;
314         while (low <= high) {
315                 index = (low + high) / 2;
316                 nkey = nilfs_btree_node_get_key(node, index);
317                 if (nkey == key) {
318                         s = 0;
319                         goto out;
320                 } else if (nkey < key) {
321                         low = index + 1;
322                         s = -1;
323                 } else {
324                         high = index - 1;
325                         s = 1;
326                 }
327         }
328
329         /* adjust index */
330         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
331                 if (s > 0 && index > 0)
332                         index--;
333         } else if (s < 0)
334                 index++;
335
336  out:
337         *indexp = index;
338
339         return s == 0;
340 }
341
342 /**
343  * nilfs_btree_node_broken - verify consistency of btree node
344  * @node: btree node block to be examined
345  * @size: node size (in bytes)
346  * @blocknr: block number
347  *
348  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
349  */
350 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
351                                    size_t size, sector_t blocknr)
352 {
353         int level, flags, nchildren;
354         int ret = 0;
355
356         level = nilfs_btree_node_get_level(node);
357         flags = nilfs_btree_node_get_flags(node);
358         nchildren = nilfs_btree_node_get_nchildren(node);
359
360         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
361                      level >= NILFS_BTREE_LEVEL_MAX ||
362                      (flags & NILFS_BTREE_NODE_ROOT) ||
363                      nchildren < 0 ||
364                      nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
365                 printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
366                        "level = %d, flags = 0x%x, nchildren = %d\n",
367                        (unsigned long long)blocknr, level, flags, nchildren);
368                 ret = 1;
369         }
370         return ret;
371 }
372
373 /**
374  * nilfs_btree_root_broken - verify consistency of btree root node
375  * @node: btree root node to be examined
376  * @ino: inode number
377  *
378  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
379  */
380 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
381                                    unsigned long ino)
382 {
383         int level, flags, nchildren;
384         int ret = 0;
385
386         level = nilfs_btree_node_get_level(node);
387         flags = nilfs_btree_node_get_flags(node);
388         nchildren = nilfs_btree_node_get_nchildren(node);
389
390         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
391                      level > NILFS_BTREE_LEVEL_MAX ||
392                      nchildren < 0 ||
393                      nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
394                 pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
395                         ino, level, flags, nchildren);
396                 ret = 1;
397         }
398         return ret;
399 }
400
401 int nilfs_btree_broken_node_block(struct buffer_head *bh)
402 {
403         int ret;
404
405         if (buffer_nilfs_checked(bh))
406                 return 0;
407
408         ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
409                                        bh->b_size, bh->b_blocknr);
410         if (likely(!ret))
411                 set_buffer_nilfs_checked(bh);
412         return ret;
413 }
414
415 static struct nilfs_btree_node *
416 nilfs_btree_get_root(const struct nilfs_bmap *btree)
417 {
418         return (struct nilfs_btree_node *)btree->b_u.u_data;
419 }
420
421 static struct nilfs_btree_node *
422 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
423 {
424         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
425 }
426
427 static struct nilfs_btree_node *
428 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
429 {
430         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
431 }
432
433 static int nilfs_btree_height(const struct nilfs_bmap *btree)
434 {
435         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
436 }
437
438 static struct nilfs_btree_node *
439 nilfs_btree_get_node(const struct nilfs_bmap *btree,
440                      const struct nilfs_btree_path *path,
441                      int level, int *ncmaxp)
442 {
443         struct nilfs_btree_node *node;
444
445         if (level == nilfs_btree_height(btree) - 1) {
446                 node = nilfs_btree_get_root(btree);
447                 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
448         } else {
449                 node = nilfs_btree_get_nonroot_node(path, level);
450                 *ncmaxp = nilfs_btree_nchildren_per_block(btree);
451         }
452         return node;
453 }
454
455 static int
456 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
457 {
458         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
459                 dump_stack();
460                 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
461                        nilfs_btree_node_get_level(node), level);
462                 return 1;
463         }
464         return 0;
465 }
466
467 struct nilfs_btree_readahead_info {
468         struct nilfs_btree_node *node;  /* parent node */
469         int max_ra_blocks;              /* max nof blocks to read ahead */
470         int index;                      /* current index on the parent node */
471         int ncmax;                      /* nof children in the parent node */
472 };
473
474 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
475                                    struct buffer_head **bhp,
476                                    const struct nilfs_btree_readahead_info *ra)
477 {
478         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
479         struct buffer_head *bh, *ra_bh;
480         sector_t submit_ptr = 0;
481         int ret;
482
483         ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr);
484         if (ret) {
485                 if (ret != -EEXIST)
486                         return ret;
487                 goto out_check;
488         }
489
490         if (ra) {
491                 int i, n;
492                 __u64 ptr2;
493
494                 /* read ahead sibling nodes */
495                 for (n = ra->max_ra_blocks, i = ra->index + 1;
496                      n > 0 && i < ra->ncmax; n--, i++) {
497                         ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
498
499                         ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA,
500                                                         &ra_bh, &submit_ptr);
501                         if (likely(!ret || ret == -EEXIST))
502                                 brelse(ra_bh);
503                         else if (ret != -EBUSY)
504                                 break;
505                         if (!buffer_locked(bh))
506                                 goto out_no_wait;
507                 }
508         }
509
510         wait_on_buffer(bh);
511
512  out_no_wait:
513         if (!buffer_uptodate(bh)) {
514                 brelse(bh);
515                 return -EIO;
516         }
517
518  out_check:
519         if (nilfs_btree_broken_node_block(bh)) {
520                 clear_buffer_uptodate(bh);
521                 brelse(bh);
522                 return -EINVAL;
523         }
524
525         *bhp = bh;
526         return 0;
527 }
528
529 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
530                                    struct buffer_head **bhp)
531 {
532         return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
533 }
534
535 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
536                                  struct nilfs_btree_path *path,
537                                  __u64 key, __u64 *ptrp, int minlevel,
538                                  int readahead)
539 {
540         struct nilfs_btree_node *node;
541         struct nilfs_btree_readahead_info p, *ra;
542         __u64 ptr;
543         int level, index, found, ncmax, ret;
544
545         node = nilfs_btree_get_root(btree);
546         level = nilfs_btree_node_get_level(node);
547         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
548                 return -ENOENT;
549
550         found = nilfs_btree_node_lookup(node, key, &index);
551         ptr = nilfs_btree_node_get_ptr(node, index,
552                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
553         path[level].bp_bh = NULL;
554         path[level].bp_index = index;
555
556         ncmax = nilfs_btree_nchildren_per_block(btree);
557
558         while (--level >= minlevel) {
559                 ra = NULL;
560                 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
561                         p.node = nilfs_btree_get_node(btree, path, level + 1,
562                                                       &p.ncmax);
563                         p.index = index;
564                         p.max_ra_blocks = 7;
565                         ra = &p;
566                 }
567                 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
568                                               ra);
569                 if (ret < 0)
570                         return ret;
571
572                 node = nilfs_btree_get_nonroot_node(path, level);
573                 if (nilfs_btree_bad_node(node, level))
574                         return -EINVAL;
575                 if (!found)
576                         found = nilfs_btree_node_lookup(node, key, &index);
577                 else
578                         index = 0;
579                 if (index < ncmax) {
580                         ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
581                 } else {
582                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
583                         /* insert */
584                         ptr = NILFS_BMAP_INVALID_PTR;
585                 }
586                 path[level].bp_index = index;
587         }
588         if (!found)
589                 return -ENOENT;
590
591         if (ptrp != NULL)
592                 *ptrp = ptr;
593
594         return 0;
595 }
596
597 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
598                                       struct nilfs_btree_path *path,
599                                       __u64 *keyp, __u64 *ptrp)
600 {
601         struct nilfs_btree_node *node;
602         __u64 ptr;
603         int index, level, ncmax, ret;
604
605         node = nilfs_btree_get_root(btree);
606         index = nilfs_btree_node_get_nchildren(node) - 1;
607         if (index < 0)
608                 return -ENOENT;
609         level = nilfs_btree_node_get_level(node);
610         ptr = nilfs_btree_node_get_ptr(node, index,
611                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
612         path[level].bp_bh = NULL;
613         path[level].bp_index = index;
614         ncmax = nilfs_btree_nchildren_per_block(btree);
615
616         for (level--; level > 0; level--) {
617                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
618                 if (ret < 0)
619                         return ret;
620                 node = nilfs_btree_get_nonroot_node(path, level);
621                 if (nilfs_btree_bad_node(node, level))
622                         return -EINVAL;
623                 index = nilfs_btree_node_get_nchildren(node) - 1;
624                 ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
625                 path[level].bp_index = index;
626         }
627
628         if (keyp != NULL)
629                 *keyp = nilfs_btree_node_get_key(node, index);
630         if (ptrp != NULL)
631                 *ptrp = ptr;
632
633         return 0;
634 }
635
636 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
637                               __u64 key, int level, __u64 *ptrp)
638 {
639         struct nilfs_btree_path *path;
640         int ret;
641
642         path = nilfs_btree_alloc_path();
643         if (path == NULL)
644                 return -ENOMEM;
645
646         ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
647
648         nilfs_btree_free_path(path);
649
650         return ret;
651 }
652
653 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
654                                      __u64 key, __u64 *ptrp, unsigned maxblocks)
655 {
656         struct nilfs_btree_path *path;
657         struct nilfs_btree_node *node;
658         struct inode *dat = NULL;
659         __u64 ptr, ptr2;
660         sector_t blocknr;
661         int level = NILFS_BTREE_LEVEL_NODE_MIN;
662         int ret, cnt, index, maxlevel, ncmax;
663         struct nilfs_btree_readahead_info p;
664
665         path = nilfs_btree_alloc_path();
666         if (path == NULL)
667                 return -ENOMEM;
668
669         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
670         if (ret < 0)
671                 goto out;
672
673         if (NILFS_BMAP_USE_VBN(btree)) {
674                 dat = nilfs_bmap_get_dat(btree);
675                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
676                 if (ret < 0)
677                         goto out;
678                 ptr = blocknr;
679         }
680         cnt = 1;
681         if (cnt == maxblocks)
682                 goto end;
683
684         maxlevel = nilfs_btree_height(btree) - 1;
685         node = nilfs_btree_get_node(btree, path, level, &ncmax);
686         index = path[level].bp_index + 1;
687         for (;;) {
688                 while (index < nilfs_btree_node_get_nchildren(node)) {
689                         if (nilfs_btree_node_get_key(node, index) !=
690                             key + cnt)
691                                 goto end;
692                         ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
693                         if (dat) {
694                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
695                                 if (ret < 0)
696                                         goto out;
697                                 ptr2 = blocknr;
698                         }
699                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
700                                 goto end;
701                         index++;
702                         continue;
703                 }
704                 if (level == maxlevel)
705                         break;
706
707                 /* look-up right sibling node */
708                 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
709                 p.index = path[level + 1].bp_index + 1;
710                 p.max_ra_blocks = 7;
711                 if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
712                     nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
713                         break;
714                 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
715                 path[level + 1].bp_index = p.index;
716
717                 brelse(path[level].bp_bh);
718                 path[level].bp_bh = NULL;
719
720                 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
721                                               &p);
722                 if (ret < 0)
723                         goto out;
724                 node = nilfs_btree_get_nonroot_node(path, level);
725                 ncmax = nilfs_btree_nchildren_per_block(btree);
726                 index = 0;
727                 path[level].bp_index = index;
728         }
729  end:
730         *ptrp = ptr;
731         ret = cnt;
732  out:
733         nilfs_btree_free_path(path);
734         return ret;
735 }
736
737 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
738                                     struct nilfs_btree_path *path,
739                                     int level, __u64 key)
740 {
741         if (level < nilfs_btree_height(btree) - 1) {
742                 do {
743                         nilfs_btree_node_set_key(
744                                 nilfs_btree_get_nonroot_node(path, level),
745                                 path[level].bp_index, key);
746                         if (!buffer_dirty(path[level].bp_bh))
747                                 mark_buffer_dirty(path[level].bp_bh);
748                 } while ((path[level].bp_index == 0) &&
749                          (++level < nilfs_btree_height(btree) - 1));
750         }
751
752         /* root */
753         if (level == nilfs_btree_height(btree) - 1) {
754                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
755                                          path[level].bp_index, key);
756         }
757 }
758
759 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
760                                   struct nilfs_btree_path *path,
761                                   int level, __u64 *keyp, __u64 *ptrp)
762 {
763         struct nilfs_btree_node *node;
764         int ncblk;
765
766         if (level < nilfs_btree_height(btree) - 1) {
767                 node = nilfs_btree_get_nonroot_node(path, level);
768                 ncblk = nilfs_btree_nchildren_per_block(btree);
769                 nilfs_btree_node_insert(node, path[level].bp_index,
770                                         *keyp, *ptrp, ncblk);
771                 if (!buffer_dirty(path[level].bp_bh))
772                         mark_buffer_dirty(path[level].bp_bh);
773
774                 if (path[level].bp_index == 0)
775                         nilfs_btree_promote_key(btree, path, level + 1,
776                                                 nilfs_btree_node_get_key(node,
777                                                                          0));
778         } else {
779                 node = nilfs_btree_get_root(btree);
780                 nilfs_btree_node_insert(node, path[level].bp_index,
781                                         *keyp, *ptrp,
782                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
783         }
784 }
785
786 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
787                                    struct nilfs_btree_path *path,
788                                    int level, __u64 *keyp, __u64 *ptrp)
789 {
790         struct nilfs_btree_node *node, *left;
791         int nchildren, lnchildren, n, move, ncblk;
792
793         node = nilfs_btree_get_nonroot_node(path, level);
794         left = nilfs_btree_get_sib_node(path, level);
795         nchildren = nilfs_btree_node_get_nchildren(node);
796         lnchildren = nilfs_btree_node_get_nchildren(left);
797         ncblk = nilfs_btree_nchildren_per_block(btree);
798         move = 0;
799
800         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
801         if (n > path[level].bp_index) {
802                 /* move insert point */
803                 n--;
804                 move = 1;
805         }
806
807         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
808
809         if (!buffer_dirty(path[level].bp_bh))
810                 mark_buffer_dirty(path[level].bp_bh);
811         if (!buffer_dirty(path[level].bp_sib_bh))
812                 mark_buffer_dirty(path[level].bp_sib_bh);
813
814         nilfs_btree_promote_key(btree, path, level + 1,
815                                 nilfs_btree_node_get_key(node, 0));
816
817         if (move) {
818                 brelse(path[level].bp_bh);
819                 path[level].bp_bh = path[level].bp_sib_bh;
820                 path[level].bp_sib_bh = NULL;
821                 path[level].bp_index += lnchildren;
822                 path[level + 1].bp_index--;
823         } else {
824                 brelse(path[level].bp_sib_bh);
825                 path[level].bp_sib_bh = NULL;
826                 path[level].bp_index -= n;
827         }
828
829         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
830 }
831
832 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
833                                     struct nilfs_btree_path *path,
834                                     int level, __u64 *keyp, __u64 *ptrp)
835 {
836         struct nilfs_btree_node *node, *right;
837         int nchildren, rnchildren, n, move, ncblk;
838
839         node = nilfs_btree_get_nonroot_node(path, level);
840         right = nilfs_btree_get_sib_node(path, level);
841         nchildren = nilfs_btree_node_get_nchildren(node);
842         rnchildren = nilfs_btree_node_get_nchildren(right);
843         ncblk = nilfs_btree_nchildren_per_block(btree);
844         move = 0;
845
846         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
847         if (n > nchildren - path[level].bp_index) {
848                 /* move insert point */
849                 n--;
850                 move = 1;
851         }
852
853         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
854
855         if (!buffer_dirty(path[level].bp_bh))
856                 mark_buffer_dirty(path[level].bp_bh);
857         if (!buffer_dirty(path[level].bp_sib_bh))
858                 mark_buffer_dirty(path[level].bp_sib_bh);
859
860         path[level + 1].bp_index++;
861         nilfs_btree_promote_key(btree, path, level + 1,
862                                 nilfs_btree_node_get_key(right, 0));
863         path[level + 1].bp_index--;
864
865         if (move) {
866                 brelse(path[level].bp_bh);
867                 path[level].bp_bh = path[level].bp_sib_bh;
868                 path[level].bp_sib_bh = NULL;
869                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
870                 path[level + 1].bp_index++;
871         } else {
872                 brelse(path[level].bp_sib_bh);
873                 path[level].bp_sib_bh = NULL;
874         }
875
876         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
877 }
878
879 static void nilfs_btree_split(struct nilfs_bmap *btree,
880                               struct nilfs_btree_path *path,
881                               int level, __u64 *keyp, __u64 *ptrp)
882 {
883         struct nilfs_btree_node *node, *right;
884         __u64 newkey;
885         __u64 newptr;
886         int nchildren, n, move, ncblk;
887
888         node = nilfs_btree_get_nonroot_node(path, level);
889         right = nilfs_btree_get_sib_node(path, level);
890         nchildren = nilfs_btree_node_get_nchildren(node);
891         ncblk = nilfs_btree_nchildren_per_block(btree);
892         move = 0;
893
894         n = (nchildren + 1) / 2;
895         if (n > nchildren - path[level].bp_index) {
896                 n--;
897                 move = 1;
898         }
899
900         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
901
902         if (!buffer_dirty(path[level].bp_bh))
903                 mark_buffer_dirty(path[level].bp_bh);
904         if (!buffer_dirty(path[level].bp_sib_bh))
905                 mark_buffer_dirty(path[level].bp_sib_bh);
906
907         newkey = nilfs_btree_node_get_key(right, 0);
908         newptr = path[level].bp_newreq.bpr_ptr;
909
910         if (move) {
911                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
912                 nilfs_btree_node_insert(right, path[level].bp_index,
913                                         *keyp, *ptrp, ncblk);
914
915                 *keyp = nilfs_btree_node_get_key(right, 0);
916                 *ptrp = path[level].bp_newreq.bpr_ptr;
917
918                 brelse(path[level].bp_bh);
919                 path[level].bp_bh = path[level].bp_sib_bh;
920                 path[level].bp_sib_bh = NULL;
921         } else {
922                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
923
924                 *keyp = nilfs_btree_node_get_key(right, 0);
925                 *ptrp = path[level].bp_newreq.bpr_ptr;
926
927                 brelse(path[level].bp_sib_bh);
928                 path[level].bp_sib_bh = NULL;
929         }
930
931         path[level + 1].bp_index++;
932 }
933
934 static void nilfs_btree_grow(struct nilfs_bmap *btree,
935                              struct nilfs_btree_path *path,
936                              int level, __u64 *keyp, __u64 *ptrp)
937 {
938         struct nilfs_btree_node *root, *child;
939         int n, ncblk;
940
941         root = nilfs_btree_get_root(btree);
942         child = nilfs_btree_get_sib_node(path, level);
943         ncblk = nilfs_btree_nchildren_per_block(btree);
944
945         n = nilfs_btree_node_get_nchildren(root);
946
947         nilfs_btree_node_move_right(root, child, n,
948                                     NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
949         nilfs_btree_node_set_level(root, level + 1);
950
951         if (!buffer_dirty(path[level].bp_sib_bh))
952                 mark_buffer_dirty(path[level].bp_sib_bh);
953
954         path[level].bp_bh = path[level].bp_sib_bh;
955         path[level].bp_sib_bh = NULL;
956
957         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
958
959         *keyp = nilfs_btree_node_get_key(child, 0);
960         *ptrp = path[level].bp_newreq.bpr_ptr;
961 }
962
963 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
964                                    const struct nilfs_btree_path *path)
965 {
966         struct nilfs_btree_node *node;
967         int level, ncmax;
968
969         if (path == NULL)
970                 return NILFS_BMAP_INVALID_PTR;
971
972         /* left sibling */
973         level = NILFS_BTREE_LEVEL_NODE_MIN;
974         if (path[level].bp_index > 0) {
975                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
976                 return nilfs_btree_node_get_ptr(node,
977                                                 path[level].bp_index - 1,
978                                                 ncmax);
979         }
980
981         /* parent */
982         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
983         if (level <= nilfs_btree_height(btree) - 1) {
984                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
985                 return nilfs_btree_node_get_ptr(node, path[level].bp_index,
986                                                 ncmax);
987         }
988
989         return NILFS_BMAP_INVALID_PTR;
990 }
991
992 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
993                                        const struct nilfs_btree_path *path,
994                                        __u64 key)
995 {
996         __u64 ptr;
997
998         ptr = nilfs_bmap_find_target_seq(btree, key);
999         if (ptr != NILFS_BMAP_INVALID_PTR)
1000                 /* sequential access */
1001                 return ptr;
1002         else {
1003                 ptr = nilfs_btree_find_near(btree, path);
1004                 if (ptr != NILFS_BMAP_INVALID_PTR)
1005                         /* near */
1006                         return ptr;
1007         }
1008         /* block group */
1009         return nilfs_bmap_find_target_in_group(btree);
1010 }
1011
1012 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1013                                       struct nilfs_btree_path *path,
1014                                       int *levelp, __u64 key, __u64 ptr,
1015                                       struct nilfs_bmap_stats *stats)
1016 {
1017         struct buffer_head *bh;
1018         struct nilfs_btree_node *node, *parent, *sib;
1019         __u64 sibptr;
1020         int pindex, level, ncmax, ncblk, ret;
1021         struct inode *dat = NULL;
1022
1023         stats->bs_nblocks = 0;
1024         level = NILFS_BTREE_LEVEL_DATA;
1025
1026         /* allocate a new ptr for data block */
1027         if (NILFS_BMAP_USE_VBN(btree)) {
1028                 path[level].bp_newreq.bpr_ptr =
1029                         nilfs_btree_find_target_v(btree, path, key);
1030                 dat = nilfs_bmap_get_dat(btree);
1031         }
1032
1033         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1034         if (ret < 0)
1035                 goto err_out_data;
1036
1037         ncblk = nilfs_btree_nchildren_per_block(btree);
1038
1039         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1040              level < nilfs_btree_height(btree) - 1;
1041              level++) {
1042                 node = nilfs_btree_get_nonroot_node(path, level);
1043                 if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1044                         path[level].bp_op = nilfs_btree_do_insert;
1045                         stats->bs_nblocks++;
1046                         goto out;
1047                 }
1048
1049                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1050                 pindex = path[level + 1].bp_index;
1051
1052                 /* left sibling */
1053                 if (pindex > 0) {
1054                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1055                                                           ncmax);
1056                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1057                         if (ret < 0)
1058                                 goto err_out_child_node;
1059                         sib = (struct nilfs_btree_node *)bh->b_data;
1060                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1061                                 path[level].bp_sib_bh = bh;
1062                                 path[level].bp_op = nilfs_btree_carry_left;
1063                                 stats->bs_nblocks++;
1064                                 goto out;
1065                         } else {
1066                                 brelse(bh);
1067                         }
1068                 }
1069
1070                 /* right sibling */
1071                 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1072                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1073                                                           ncmax);
1074                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1075                         if (ret < 0)
1076                                 goto err_out_child_node;
1077                         sib = (struct nilfs_btree_node *)bh->b_data;
1078                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1079                                 path[level].bp_sib_bh = bh;
1080                                 path[level].bp_op = nilfs_btree_carry_right;
1081                                 stats->bs_nblocks++;
1082                                 goto out;
1083                         } else {
1084                                 brelse(bh);
1085                         }
1086                 }
1087
1088                 /* split */
1089                 path[level].bp_newreq.bpr_ptr =
1090                         path[level - 1].bp_newreq.bpr_ptr + 1;
1091                 ret = nilfs_bmap_prepare_alloc_ptr(btree,
1092                                                    &path[level].bp_newreq, dat);
1093                 if (ret < 0)
1094                         goto err_out_child_node;
1095                 ret = nilfs_btree_get_new_block(btree,
1096                                                 path[level].bp_newreq.bpr_ptr,
1097                                                 &bh);
1098                 if (ret < 0)
1099                         goto err_out_curr_node;
1100
1101                 stats->bs_nblocks++;
1102
1103                 sib = (struct nilfs_btree_node *)bh->b_data;
1104                 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1105                 path[level].bp_sib_bh = bh;
1106                 path[level].bp_op = nilfs_btree_split;
1107         }
1108
1109         /* root */
1110         node = nilfs_btree_get_root(btree);
1111         if (nilfs_btree_node_get_nchildren(node) <
1112             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1113                 path[level].bp_op = nilfs_btree_do_insert;
1114                 stats->bs_nblocks++;
1115                 goto out;
1116         }
1117
1118         /* grow */
1119         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1120         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1121         if (ret < 0)
1122                 goto err_out_child_node;
1123         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1124                                         &bh);
1125         if (ret < 0)
1126                 goto err_out_curr_node;
1127
1128         nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1129                               0, level, 0, ncblk, NULL, NULL);
1130         path[level].bp_sib_bh = bh;
1131         path[level].bp_op = nilfs_btree_grow;
1132
1133         level++;
1134         path[level].bp_op = nilfs_btree_do_insert;
1135
1136         /* a newly-created node block and a data block are added */
1137         stats->bs_nblocks += 2;
1138
1139         /* success */
1140  out:
1141         *levelp = level;
1142         return ret;
1143
1144         /* error */
1145  err_out_curr_node:
1146         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1147  err_out_child_node:
1148         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1149                 nilfs_btnode_delete(path[level].bp_sib_bh);
1150                 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1151
1152         }
1153
1154         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1155  err_out_data:
1156         *levelp = level;
1157         stats->bs_nblocks = 0;
1158         return ret;
1159 }
1160
1161 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1162                                       struct nilfs_btree_path *path,
1163                                       int maxlevel, __u64 key, __u64 ptr)
1164 {
1165         struct inode *dat = NULL;
1166         int level;
1167
1168         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1169         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1170         if (NILFS_BMAP_USE_VBN(btree)) {
1171                 nilfs_bmap_set_target_v(btree, key, ptr);
1172                 dat = nilfs_bmap_get_dat(btree);
1173         }
1174
1175         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1176                 nilfs_bmap_commit_alloc_ptr(btree,
1177                                             &path[level - 1].bp_newreq, dat);
1178                 path[level].bp_op(btree, path, level, &key, &ptr);
1179         }
1180
1181         if (!nilfs_bmap_dirty(btree))
1182                 nilfs_bmap_set_dirty(btree);
1183 }
1184
1185 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1186 {
1187         struct nilfs_btree_path *path;
1188         struct nilfs_bmap_stats stats;
1189         int level, ret;
1190
1191         path = nilfs_btree_alloc_path();
1192         if (path == NULL)
1193                 return -ENOMEM;
1194
1195         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1196                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1197         if (ret != -ENOENT) {
1198                 if (ret == 0)
1199                         ret = -EEXIST;
1200                 goto out;
1201         }
1202
1203         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1204         if (ret < 0)
1205                 goto out;
1206         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1207         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1208
1209  out:
1210         nilfs_btree_free_path(path);
1211         return ret;
1212 }
1213
1214 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1215                                   struct nilfs_btree_path *path,
1216                                   int level, __u64 *keyp, __u64 *ptrp)
1217 {
1218         struct nilfs_btree_node *node;
1219         int ncblk;
1220
1221         if (level < nilfs_btree_height(btree) - 1) {
1222                 node = nilfs_btree_get_nonroot_node(path, level);
1223                 ncblk = nilfs_btree_nchildren_per_block(btree);
1224                 nilfs_btree_node_delete(node, path[level].bp_index,
1225                                         keyp, ptrp, ncblk);
1226                 if (!buffer_dirty(path[level].bp_bh))
1227                         mark_buffer_dirty(path[level].bp_bh);
1228                 if (path[level].bp_index == 0)
1229                         nilfs_btree_promote_key(btree, path, level + 1,
1230                                 nilfs_btree_node_get_key(node, 0));
1231         } else {
1232                 node = nilfs_btree_get_root(btree);
1233                 nilfs_btree_node_delete(node, path[level].bp_index,
1234                                         keyp, ptrp,
1235                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1236         }
1237 }
1238
1239 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1240                                     struct nilfs_btree_path *path,
1241                                     int level, __u64 *keyp, __u64 *ptrp)
1242 {
1243         struct nilfs_btree_node *node, *left;
1244         int nchildren, lnchildren, n, ncblk;
1245
1246         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1247
1248         node = nilfs_btree_get_nonroot_node(path, level);
1249         left = nilfs_btree_get_sib_node(path, level);
1250         nchildren = nilfs_btree_node_get_nchildren(node);
1251         lnchildren = nilfs_btree_node_get_nchildren(left);
1252         ncblk = nilfs_btree_nchildren_per_block(btree);
1253
1254         n = (nchildren + lnchildren) / 2 - nchildren;
1255
1256         nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1257
1258         if (!buffer_dirty(path[level].bp_bh))
1259                 mark_buffer_dirty(path[level].bp_bh);
1260         if (!buffer_dirty(path[level].bp_sib_bh))
1261                 mark_buffer_dirty(path[level].bp_sib_bh);
1262
1263         nilfs_btree_promote_key(btree, path, level + 1,
1264                                 nilfs_btree_node_get_key(node, 0));
1265
1266         brelse(path[level].bp_sib_bh);
1267         path[level].bp_sib_bh = NULL;
1268         path[level].bp_index += n;
1269 }
1270
1271 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1272                                      struct nilfs_btree_path *path,
1273                                      int level, __u64 *keyp, __u64 *ptrp)
1274 {
1275         struct nilfs_btree_node *node, *right;
1276         int nchildren, rnchildren, n, ncblk;
1277
1278         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1279
1280         node = nilfs_btree_get_nonroot_node(path, level);
1281         right = nilfs_btree_get_sib_node(path, level);
1282         nchildren = nilfs_btree_node_get_nchildren(node);
1283         rnchildren = nilfs_btree_node_get_nchildren(right);
1284         ncblk = nilfs_btree_nchildren_per_block(btree);
1285
1286         n = (nchildren + rnchildren) / 2 - nchildren;
1287
1288         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1289
1290         if (!buffer_dirty(path[level].bp_bh))
1291                 mark_buffer_dirty(path[level].bp_bh);
1292         if (!buffer_dirty(path[level].bp_sib_bh))
1293                 mark_buffer_dirty(path[level].bp_sib_bh);
1294
1295         path[level + 1].bp_index++;
1296         nilfs_btree_promote_key(btree, path, level + 1,
1297                                 nilfs_btree_node_get_key(right, 0));
1298         path[level + 1].bp_index--;
1299
1300         brelse(path[level].bp_sib_bh);
1301         path[level].bp_sib_bh = NULL;
1302 }
1303
1304 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1305                                     struct nilfs_btree_path *path,
1306                                     int level, __u64 *keyp, __u64 *ptrp)
1307 {
1308         struct nilfs_btree_node *node, *left;
1309         int n, ncblk;
1310
1311         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1312
1313         node = nilfs_btree_get_nonroot_node(path, level);
1314         left = nilfs_btree_get_sib_node(path, level);
1315         ncblk = nilfs_btree_nchildren_per_block(btree);
1316
1317         n = nilfs_btree_node_get_nchildren(node);
1318
1319         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1320
1321         if (!buffer_dirty(path[level].bp_sib_bh))
1322                 mark_buffer_dirty(path[level].bp_sib_bh);
1323
1324         nilfs_btnode_delete(path[level].bp_bh);
1325         path[level].bp_bh = path[level].bp_sib_bh;
1326         path[level].bp_sib_bh = NULL;
1327         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1328 }
1329
1330 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1331                                      struct nilfs_btree_path *path,
1332                                      int level, __u64 *keyp, __u64 *ptrp)
1333 {
1334         struct nilfs_btree_node *node, *right;
1335         int n, ncblk;
1336
1337         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1338
1339         node = nilfs_btree_get_nonroot_node(path, level);
1340         right = nilfs_btree_get_sib_node(path, level);
1341         ncblk = nilfs_btree_nchildren_per_block(btree);
1342
1343         n = nilfs_btree_node_get_nchildren(right);
1344
1345         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1346
1347         if (!buffer_dirty(path[level].bp_bh))
1348                 mark_buffer_dirty(path[level].bp_bh);
1349
1350         nilfs_btnode_delete(path[level].bp_sib_bh);
1351         path[level].bp_sib_bh = NULL;
1352         path[level + 1].bp_index++;
1353 }
1354
1355 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1356                                struct nilfs_btree_path *path,
1357                                int level, __u64 *keyp, __u64 *ptrp)
1358 {
1359         struct nilfs_btree_node *root, *child;
1360         int n, ncblk;
1361
1362         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1363
1364         root = nilfs_btree_get_root(btree);
1365         child = nilfs_btree_get_nonroot_node(path, level);
1366         ncblk = nilfs_btree_nchildren_per_block(btree);
1367
1368         nilfs_btree_node_delete(root, 0, NULL, NULL,
1369                                 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1370         nilfs_btree_node_set_level(root, level);
1371         n = nilfs_btree_node_get_nchildren(child);
1372         nilfs_btree_node_move_left(root, child, n,
1373                                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1374
1375         nilfs_btnode_delete(path[level].bp_bh);
1376         path[level].bp_bh = NULL;
1377 }
1378
1379 static void nilfs_btree_nop(struct nilfs_bmap *btree,
1380                             struct nilfs_btree_path *path,
1381                             int level, __u64 *keyp, __u64 *ptrp)
1382 {
1383 }
1384
1385 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1386                                       struct nilfs_btree_path *path,
1387                                       int *levelp,
1388                                       struct nilfs_bmap_stats *stats,
1389                                       struct inode *dat)
1390 {
1391         struct buffer_head *bh;
1392         struct nilfs_btree_node *node, *parent, *sib;
1393         __u64 sibptr;
1394         int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1395
1396         ret = 0;
1397         stats->bs_nblocks = 0;
1398         ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1399         ncblk = nilfs_btree_nchildren_per_block(btree);
1400
1401         for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1402              level < nilfs_btree_height(btree) - 1;
1403              level++) {
1404                 node = nilfs_btree_get_nonroot_node(path, level);
1405                 path[level].bp_oldreq.bpr_ptr =
1406                         nilfs_btree_node_get_ptr(node, dindex, ncblk);
1407                 ret = nilfs_bmap_prepare_end_ptr(btree,
1408                                                  &path[level].bp_oldreq, dat);
1409                 if (ret < 0)
1410                         goto err_out_child_node;
1411
1412                 if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1413                         path[level].bp_op = nilfs_btree_do_delete;
1414                         stats->bs_nblocks++;
1415                         goto out;
1416                 }
1417
1418                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1419                 pindex = path[level + 1].bp_index;
1420                 dindex = pindex;
1421
1422                 if (pindex > 0) {
1423                         /* left sibling */
1424                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1425                                                           ncmax);
1426                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1427                         if (ret < 0)
1428                                 goto err_out_curr_node;
1429                         sib = (struct nilfs_btree_node *)bh->b_data;
1430                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1431                                 path[level].bp_sib_bh = bh;
1432                                 path[level].bp_op = nilfs_btree_borrow_left;
1433                                 stats->bs_nblocks++;
1434                                 goto out;
1435                         } else {
1436                                 path[level].bp_sib_bh = bh;
1437                                 path[level].bp_op = nilfs_btree_concat_left;
1438                                 stats->bs_nblocks++;
1439                                 /* continue; */
1440                         }
1441                 } else if (pindex <
1442                            nilfs_btree_node_get_nchildren(parent) - 1) {
1443                         /* right sibling */
1444                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1445                                                           ncmax);
1446                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1447                         if (ret < 0)
1448                                 goto err_out_curr_node;
1449                         sib = (struct nilfs_btree_node *)bh->b_data;
1450                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1451                                 path[level].bp_sib_bh = bh;
1452                                 path[level].bp_op = nilfs_btree_borrow_right;
1453                                 stats->bs_nblocks++;
1454                                 goto out;
1455                         } else {
1456                                 path[level].bp_sib_bh = bh;
1457                                 path[level].bp_op = nilfs_btree_concat_right;
1458                                 stats->bs_nblocks++;
1459                                 /*
1460                                  * When merging right sibling node
1461                                  * into the current node, pointer to
1462                                  * the right sibling node must be
1463                                  * terminated instead.  The adjustment
1464                                  * below is required for that.
1465                                  */
1466                                 dindex = pindex + 1;
1467                                 /* continue; */
1468                         }
1469                 } else {
1470                         /* no siblings */
1471                         /* the only child of the root node */
1472                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1473                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1474                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1475                                 path[level].bp_op = nilfs_btree_shrink;
1476                                 stats->bs_nblocks += 2;
1477                                 level++;
1478                                 path[level].bp_op = nilfs_btree_nop;
1479                                 goto shrink_root_child;
1480                         } else {
1481                                 path[level].bp_op = nilfs_btree_do_delete;
1482                                 stats->bs_nblocks++;
1483                                 goto out;
1484                         }
1485                 }
1486         }
1487
1488         /* child of the root node is deleted */
1489         path[level].bp_op = nilfs_btree_do_delete;
1490         stats->bs_nblocks++;
1491
1492 shrink_root_child:
1493         node = nilfs_btree_get_root(btree);
1494         path[level].bp_oldreq.bpr_ptr =
1495                 nilfs_btree_node_get_ptr(node, dindex,
1496                                          NILFS_BTREE_ROOT_NCHILDREN_MAX);
1497
1498         ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1499         if (ret < 0)
1500                 goto err_out_child_node;
1501
1502         /* success */
1503  out:
1504         *levelp = level;
1505         return ret;
1506
1507         /* error */
1508  err_out_curr_node:
1509         nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1510  err_out_child_node:
1511         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1512                 brelse(path[level].bp_sib_bh);
1513                 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1514         }
1515         *levelp = level;
1516         stats->bs_nblocks = 0;
1517         return ret;
1518 }
1519
1520 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1521                                       struct nilfs_btree_path *path,
1522                                       int maxlevel, struct inode *dat)
1523 {
1524         int level;
1525
1526         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1527                 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1528                 path[level].bp_op(btree, path, level, NULL, NULL);
1529         }
1530
1531         if (!nilfs_bmap_dirty(btree))
1532                 nilfs_bmap_set_dirty(btree);
1533 }
1534
1535 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1536
1537 {
1538         struct nilfs_btree_path *path;
1539         struct nilfs_bmap_stats stats;
1540         struct inode *dat;
1541         int level, ret;
1542
1543         path = nilfs_btree_alloc_path();
1544         if (path == NULL)
1545                 return -ENOMEM;
1546
1547         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1548                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1549         if (ret < 0)
1550                 goto out;
1551
1552
1553         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1554
1555         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1556         if (ret < 0)
1557                 goto out;
1558         nilfs_btree_commit_delete(btree, path, level, dat);
1559         nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1560
1561 out:
1562         nilfs_btree_free_path(path);
1563         return ret;
1564 }
1565
1566 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1567 {
1568         struct nilfs_btree_path *path;
1569         int ret;
1570
1571         path = nilfs_btree_alloc_path();
1572         if (path == NULL)
1573                 return -ENOMEM;
1574
1575         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1576
1577         nilfs_btree_free_path(path);
1578
1579         return ret;
1580 }
1581
1582 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1583 {
1584         struct buffer_head *bh;
1585         struct nilfs_btree_node *root, *node;
1586         __u64 maxkey, nextmaxkey;
1587         __u64 ptr;
1588         int nchildren, ret;
1589
1590         root = nilfs_btree_get_root(btree);
1591         switch (nilfs_btree_height(btree)) {
1592         case 2:
1593                 bh = NULL;
1594                 node = root;
1595                 break;
1596         case 3:
1597                 nchildren = nilfs_btree_node_get_nchildren(root);
1598                 if (nchildren > 1)
1599                         return 0;
1600                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1601                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1602                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1603                 if (ret < 0)
1604                         return ret;
1605                 node = (struct nilfs_btree_node *)bh->b_data;
1606                 break;
1607         default:
1608                 return 0;
1609         }
1610
1611         nchildren = nilfs_btree_node_get_nchildren(node);
1612         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1613         nextmaxkey = (nchildren > 1) ?
1614                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1615         if (bh != NULL)
1616                 brelse(bh);
1617
1618         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1619 }
1620
1621 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1622                                    __u64 *keys, __u64 *ptrs, int nitems)
1623 {
1624         struct buffer_head *bh;
1625         struct nilfs_btree_node *node, *root;
1626         __le64 *dkeys;
1627         __le64 *dptrs;
1628         __u64 ptr;
1629         int nchildren, ncmax, i, ret;
1630
1631         root = nilfs_btree_get_root(btree);
1632         switch (nilfs_btree_height(btree)) {
1633         case 2:
1634                 bh = NULL;
1635                 node = root;
1636                 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1637                 break;
1638         case 3:
1639                 nchildren = nilfs_btree_node_get_nchildren(root);
1640                 WARN_ON(nchildren > 1);
1641                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1642                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1643                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1644                 if (ret < 0)
1645                         return ret;
1646                 node = (struct nilfs_btree_node *)bh->b_data;
1647                 ncmax = nilfs_btree_nchildren_per_block(btree);
1648                 break;
1649         default:
1650                 node = NULL;
1651                 return -EINVAL;
1652         }
1653
1654         nchildren = nilfs_btree_node_get_nchildren(node);
1655         if (nchildren < nitems)
1656                 nitems = nchildren;
1657         dkeys = nilfs_btree_node_dkeys(node);
1658         dptrs = nilfs_btree_node_dptrs(node, ncmax);
1659         for (i = 0; i < nitems; i++) {
1660                 keys[i] = le64_to_cpu(dkeys[i]);
1661                 ptrs[i] = le64_to_cpu(dptrs[i]);
1662         }
1663
1664         if (bh != NULL)
1665                 brelse(bh);
1666
1667         return nitems;
1668 }
1669
1670 static int
1671 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1672                                        union nilfs_bmap_ptr_req *dreq,
1673                                        union nilfs_bmap_ptr_req *nreq,
1674                                        struct buffer_head **bhp,
1675                                        struct nilfs_bmap_stats *stats)
1676 {
1677         struct buffer_head *bh;
1678         struct inode *dat = NULL;
1679         int ret;
1680
1681         stats->bs_nblocks = 0;
1682
1683         /* for data */
1684         /* cannot find near ptr */
1685         if (NILFS_BMAP_USE_VBN(btree)) {
1686                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1687                 dat = nilfs_bmap_get_dat(btree);
1688         }
1689
1690         ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1691         if (ret < 0)
1692                 return ret;
1693
1694         *bhp = NULL;
1695         stats->bs_nblocks++;
1696         if (nreq != NULL) {
1697                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1698                 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1699                 if (ret < 0)
1700                         goto err_out_dreq;
1701
1702                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1703                 if (ret < 0)
1704                         goto err_out_nreq;
1705
1706                 *bhp = bh;
1707                 stats->bs_nblocks++;
1708         }
1709
1710         /* success */
1711         return 0;
1712
1713         /* error */
1714  err_out_nreq:
1715         nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1716  err_out_dreq:
1717         nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1718         stats->bs_nblocks = 0;
1719         return ret;
1720
1721 }
1722
1723 static void
1724 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1725                                       __u64 key, __u64 ptr,
1726                                       const __u64 *keys, const __u64 *ptrs,
1727                                       int n,
1728                                       union nilfs_bmap_ptr_req *dreq,
1729                                       union nilfs_bmap_ptr_req *nreq,
1730                                       struct buffer_head *bh)
1731 {
1732         struct nilfs_btree_node *node;
1733         struct inode *dat;
1734         __u64 tmpptr;
1735         int ncblk;
1736
1737         /* free resources */
1738         if (btree->b_ops->bop_clear != NULL)
1739                 btree->b_ops->bop_clear(btree);
1740
1741         /* ptr must be a pointer to a buffer head. */
1742         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1743
1744         /* convert and insert */
1745         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1746         __nilfs_btree_init(btree);
1747         if (nreq != NULL) {
1748                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1749                 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1750
1751                 /* create child node at level 1 */
1752                 node = (struct nilfs_btree_node *)bh->b_data;
1753                 ncblk = nilfs_btree_nchildren_per_block(btree);
1754                 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1755                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1756                 if (!buffer_dirty(bh))
1757                         mark_buffer_dirty(bh);
1758                 if (!nilfs_bmap_dirty(btree))
1759                         nilfs_bmap_set_dirty(btree);
1760
1761                 brelse(bh);
1762
1763                 /* create root node at level 2 */
1764                 node = nilfs_btree_get_root(btree);
1765                 tmpptr = nreq->bpr_ptr;
1766                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1767                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1768                                       &keys[0], &tmpptr);
1769         } else {
1770                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1771
1772                 /* create root node at level 1 */
1773                 node = nilfs_btree_get_root(btree);
1774                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1775                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1776                                       keys, ptrs);
1777                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1778                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1779                 if (!nilfs_bmap_dirty(btree))
1780                         nilfs_bmap_set_dirty(btree);
1781         }
1782
1783         if (NILFS_BMAP_USE_VBN(btree))
1784                 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1785 }
1786
1787 /**
1788  * nilfs_btree_convert_and_insert -
1789  * @bmap:
1790  * @key:
1791  * @ptr:
1792  * @keys:
1793  * @ptrs:
1794  * @n:
1795  */
1796 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1797                                    __u64 key, __u64 ptr,
1798                                    const __u64 *keys, const __u64 *ptrs, int n)
1799 {
1800         struct buffer_head *bh;
1801         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1802         struct nilfs_bmap_stats stats;
1803         int ret;
1804
1805         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1806                 di = &dreq;
1807                 ni = NULL;
1808         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1809                            1 << btree->b_inode->i_blkbits)) {
1810                 di = &dreq;
1811                 ni = &nreq;
1812         } else {
1813                 di = NULL;
1814                 ni = NULL;
1815                 BUG();
1816         }
1817
1818         ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1819                                                      &stats);
1820         if (ret < 0)
1821                 return ret;
1822         nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1823                                               di, ni, bh);
1824         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1825         return 0;
1826 }
1827
1828 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1829                                    struct nilfs_btree_path *path,
1830                                    int level,
1831                                    struct buffer_head *bh)
1832 {
1833         while ((++level < nilfs_btree_height(btree) - 1) &&
1834                !buffer_dirty(path[level].bp_bh))
1835                 mark_buffer_dirty(path[level].bp_bh);
1836
1837         return 0;
1838 }
1839
1840 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1841                                         struct nilfs_btree_path *path,
1842                                         int level, struct inode *dat)
1843 {
1844         struct nilfs_btree_node *parent;
1845         int ncmax, ret;
1846
1847         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1848         path[level].bp_oldreq.bpr_ptr =
1849                 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1850                                          ncmax);
1851         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1852         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1853                                        &path[level].bp_newreq.bpr_req);
1854         if (ret < 0)
1855                 return ret;
1856
1857         if (buffer_nilfs_node(path[level].bp_bh)) {
1858                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1859                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1860                 path[level].bp_ctxt.bh = path[level].bp_bh;
1861                 ret = nilfs_btnode_prepare_change_key(
1862                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1863                         &path[level].bp_ctxt);
1864                 if (ret < 0) {
1865                         nilfs_dat_abort_update(dat,
1866                                                &path[level].bp_oldreq.bpr_req,
1867                                                &path[level].bp_newreq.bpr_req);
1868                         return ret;
1869                 }
1870         }
1871
1872         return 0;
1873 }
1874
1875 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1876                                         struct nilfs_btree_path *path,
1877                                         int level, struct inode *dat)
1878 {
1879         struct nilfs_btree_node *parent;
1880         int ncmax;
1881
1882         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1883                                 &path[level].bp_newreq.bpr_req,
1884                                 btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1885
1886         if (buffer_nilfs_node(path[level].bp_bh)) {
1887                 nilfs_btnode_commit_change_key(
1888                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1889                         &path[level].bp_ctxt);
1890                 path[level].bp_bh = path[level].bp_ctxt.bh;
1891         }
1892         set_buffer_nilfs_volatile(path[level].bp_bh);
1893
1894         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1895         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1896                                  path[level].bp_newreq.bpr_ptr, ncmax);
1897 }
1898
1899 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1900                                        struct nilfs_btree_path *path,
1901                                        int level, struct inode *dat)
1902 {
1903         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1904                                &path[level].bp_newreq.bpr_req);
1905         if (buffer_nilfs_node(path[level].bp_bh))
1906                 nilfs_btnode_abort_change_key(
1907                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1908                         &path[level].bp_ctxt);
1909 }
1910
1911 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1912                                            struct nilfs_btree_path *path,
1913                                            int minlevel, int *maxlevelp,
1914                                            struct inode *dat)
1915 {
1916         int level, ret;
1917
1918         level = minlevel;
1919         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1920                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1921                 if (ret < 0)
1922                         return ret;
1923         }
1924         while ((++level < nilfs_btree_height(btree) - 1) &&
1925                !buffer_dirty(path[level].bp_bh)) {
1926
1927                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1928                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1929                 if (ret < 0)
1930                         goto out;
1931         }
1932
1933         /* success */
1934         *maxlevelp = level - 1;
1935         return 0;
1936
1937         /* error */
1938  out:
1939         while (--level > minlevel)
1940                 nilfs_btree_abort_update_v(btree, path, level, dat);
1941         if (!buffer_nilfs_volatile(path[level].bp_bh))
1942                 nilfs_btree_abort_update_v(btree, path, level, dat);
1943         return ret;
1944 }
1945
1946 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
1947                                            struct nilfs_btree_path *path,
1948                                            int minlevel, int maxlevel,
1949                                            struct buffer_head *bh,
1950                                            struct inode *dat)
1951 {
1952         int level;
1953
1954         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1955                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1956
1957         for (level = minlevel + 1; level <= maxlevel; level++)
1958                 nilfs_btree_commit_update_v(btree, path, level, dat);
1959 }
1960
1961 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
1962                                    struct nilfs_btree_path *path,
1963                                    int level, struct buffer_head *bh)
1964 {
1965         int maxlevel = 0, ret;
1966         struct nilfs_btree_node *parent;
1967         struct inode *dat = nilfs_bmap_get_dat(btree);
1968         __u64 ptr;
1969         int ncmax;
1970
1971         get_bh(bh);
1972         path[level].bp_bh = bh;
1973         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1974                                               dat);
1975         if (ret < 0)
1976                 goto out;
1977
1978         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1979                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1980                 ptr = nilfs_btree_node_get_ptr(parent,
1981                                                path[level + 1].bp_index,
1982                                                ncmax);
1983                 ret = nilfs_dat_mark_dirty(dat, ptr);
1984                 if (ret < 0)
1985                         goto out;
1986         }
1987
1988         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1989
1990  out:
1991         brelse(path[level].bp_bh);
1992         path[level].bp_bh = NULL;
1993         return ret;
1994 }
1995
1996 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
1997                                  struct buffer_head *bh)
1998 {
1999         struct nilfs_btree_path *path;
2000         struct nilfs_btree_node *node;
2001         __u64 key;
2002         int level, ret;
2003
2004         WARN_ON(!buffer_dirty(bh));
2005
2006         path = nilfs_btree_alloc_path();
2007         if (path == NULL)
2008                 return -ENOMEM;
2009
2010         if (buffer_nilfs_node(bh)) {
2011                 node = (struct nilfs_btree_node *)bh->b_data;
2012                 key = nilfs_btree_node_get_key(node, 0);
2013                 level = nilfs_btree_node_get_level(node);
2014         } else {
2015                 key = nilfs_bmap_data_get_key(btree, bh);
2016                 level = NILFS_BTREE_LEVEL_DATA;
2017         }
2018
2019         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2020         if (ret < 0) {
2021                 if (unlikely(ret == -ENOENT))
2022                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
2023                                __func__, (unsigned long long)key, level);
2024                 goto out;
2025         }
2026
2027         ret = NILFS_BMAP_USE_VBN(btree) ?
2028                 nilfs_btree_propagate_v(btree, path, level, bh) :
2029                 nilfs_btree_propagate_p(btree, path, level, bh);
2030
2031  out:
2032         nilfs_btree_free_path(path);
2033
2034         return ret;
2035 }
2036
2037 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2038                                     struct buffer_head *bh)
2039 {
2040         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2041 }
2042
2043 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2044                                          struct list_head *lists,
2045                                          struct buffer_head *bh)
2046 {
2047         struct list_head *head;
2048         struct buffer_head *cbh;
2049         struct nilfs_btree_node *node, *cnode;
2050         __u64 key, ckey;
2051         int level;
2052
2053         get_bh(bh);
2054         node = (struct nilfs_btree_node *)bh->b_data;
2055         key = nilfs_btree_node_get_key(node, 0);
2056         level = nilfs_btree_node_get_level(node);
2057         if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2058             level >= NILFS_BTREE_LEVEL_MAX) {
2059                 dump_stack();
2060                 printk(KERN_WARNING
2061                        "%s: invalid btree level: %d (key=%llu, ino=%lu, "
2062                        "blocknr=%llu)\n",
2063                        __func__, level, (unsigned long long)key,
2064                        NILFS_BMAP_I(btree)->vfs_inode.i_ino,
2065                        (unsigned long long)bh->b_blocknr);
2066                 return;
2067         }
2068
2069         list_for_each(head, &lists[level]) {
2070                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2071                 cnode = (struct nilfs_btree_node *)cbh->b_data;
2072                 ckey = nilfs_btree_node_get_key(cnode, 0);
2073                 if (key < ckey)
2074                         break;
2075         }
2076         list_add_tail(&bh->b_assoc_buffers, head);
2077 }
2078
2079 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2080                                              struct list_head *listp)
2081 {
2082         struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
2083         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2084         struct pagevec pvec;
2085         struct buffer_head *bh, *head;
2086         pgoff_t index = 0;
2087         int level, i;
2088
2089         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2090              level < NILFS_BTREE_LEVEL_MAX;
2091              level++)
2092                 INIT_LIST_HEAD(&lists[level]);
2093
2094         pagevec_init(&pvec, 0);
2095
2096         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2097                                   PAGEVEC_SIZE)) {
2098                 for (i = 0; i < pagevec_count(&pvec); i++) {
2099                         bh = head = page_buffers(pvec.pages[i]);
2100                         do {
2101                                 if (buffer_dirty(bh))
2102                                         nilfs_btree_add_dirty_buffer(btree,
2103                                                                      lists, bh);
2104                         } while ((bh = bh->b_this_page) != head);
2105                 }
2106                 pagevec_release(&pvec);
2107                 cond_resched();
2108         }
2109
2110         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2111              level < NILFS_BTREE_LEVEL_MAX;
2112              level++)
2113                 list_splice_tail(&lists[level], listp);
2114 }
2115
2116 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2117                                 struct nilfs_btree_path *path,
2118                                 int level,
2119                                 struct buffer_head **bh,
2120                                 sector_t blocknr,
2121                                 union nilfs_binfo *binfo)
2122 {
2123         struct nilfs_btree_node *parent;
2124         __u64 key;
2125         __u64 ptr;
2126         int ncmax, ret;
2127
2128         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2129         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2130                                        ncmax);
2131         if (buffer_nilfs_node(*bh)) {
2132                 path[level].bp_ctxt.oldkey = ptr;
2133                 path[level].bp_ctxt.newkey = blocknr;
2134                 path[level].bp_ctxt.bh = *bh;
2135                 ret = nilfs_btnode_prepare_change_key(
2136                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2137                         &path[level].bp_ctxt);
2138                 if (ret < 0)
2139                         return ret;
2140                 nilfs_btnode_commit_change_key(
2141                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2142                         &path[level].bp_ctxt);
2143                 *bh = path[level].bp_ctxt.bh;
2144         }
2145
2146         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2147                                  ncmax);
2148
2149         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2150         /* on-disk format */
2151         binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2152         binfo->bi_dat.bi_level = level;
2153
2154         return 0;
2155 }
2156
2157 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2158                                 struct nilfs_btree_path *path,
2159                                 int level,
2160                                 struct buffer_head **bh,
2161                                 sector_t blocknr,
2162                                 union nilfs_binfo *binfo)
2163 {
2164         struct nilfs_btree_node *parent;
2165         struct inode *dat = nilfs_bmap_get_dat(btree);
2166         __u64 key;
2167         __u64 ptr;
2168         union nilfs_bmap_ptr_req req;
2169         int ncmax, ret;
2170
2171         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2172         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2173                                        ncmax);
2174         req.bpr_ptr = ptr;
2175         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2176         if (ret < 0)
2177                 return ret;
2178         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2179
2180         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2181         /* on-disk format */
2182         binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2183         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2184
2185         return 0;
2186 }
2187
2188 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2189                               struct buffer_head **bh,
2190                               sector_t blocknr,
2191                               union nilfs_binfo *binfo)
2192 {
2193         struct nilfs_btree_path *path;
2194         struct nilfs_btree_node *node;
2195         __u64 key;
2196         int level, ret;
2197
2198         path = nilfs_btree_alloc_path();
2199         if (path == NULL)
2200                 return -ENOMEM;
2201
2202         if (buffer_nilfs_node(*bh)) {
2203                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2204                 key = nilfs_btree_node_get_key(node, 0);
2205                 level = nilfs_btree_node_get_level(node);
2206         } else {
2207                 key = nilfs_bmap_data_get_key(btree, *bh);
2208                 level = NILFS_BTREE_LEVEL_DATA;
2209         }
2210
2211         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2212         if (ret < 0) {
2213                 WARN_ON(ret == -ENOENT);
2214                 goto out;
2215         }
2216
2217         ret = NILFS_BMAP_USE_VBN(btree) ?
2218                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2219                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2220
2221  out:
2222         nilfs_btree_free_path(path);
2223
2224         return ret;
2225 }
2226
2227 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2228                                  struct buffer_head **bh,
2229                                  sector_t blocknr,
2230                                  union nilfs_binfo *binfo)
2231 {
2232         struct nilfs_btree_node *node;
2233         __u64 key;
2234         int ret;
2235
2236         ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2237                              blocknr);
2238         if (ret < 0)
2239                 return ret;
2240
2241         if (buffer_nilfs_node(*bh)) {
2242                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2243                 key = nilfs_btree_node_get_key(node, 0);
2244         } else
2245                 key = nilfs_bmap_data_get_key(btree, *bh);
2246
2247         /* on-disk format */
2248         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2249         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2250
2251         return 0;
2252 }
2253
2254 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2255 {
2256         struct buffer_head *bh;
2257         struct nilfs_btree_path *path;
2258         __u64 ptr;
2259         int ret;
2260
2261         path = nilfs_btree_alloc_path();
2262         if (path == NULL)
2263                 return -ENOMEM;
2264
2265         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2266         if (ret < 0) {
2267                 WARN_ON(ret == -ENOENT);
2268                 goto out;
2269         }
2270         ret = nilfs_btree_get_block(btree, ptr, &bh);
2271         if (ret < 0) {
2272                 WARN_ON(ret == -ENOENT);
2273                 goto out;
2274         }
2275
2276         if (!buffer_dirty(bh))
2277                 mark_buffer_dirty(bh);
2278         brelse(bh);
2279         if (!nilfs_bmap_dirty(btree))
2280                 nilfs_bmap_set_dirty(btree);
2281
2282  out:
2283         nilfs_btree_free_path(path);
2284         return ret;
2285 }
2286
2287 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2288         .bop_lookup             =       nilfs_btree_lookup,
2289         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2290         .bop_insert             =       nilfs_btree_insert,
2291         .bop_delete             =       nilfs_btree_delete,
2292         .bop_clear              =       NULL,
2293
2294         .bop_propagate          =       nilfs_btree_propagate,
2295
2296         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2297
2298         .bop_assign             =       nilfs_btree_assign,
2299         .bop_mark               =       nilfs_btree_mark,
2300
2301         .bop_last_key           =       nilfs_btree_last_key,
2302         .bop_check_insert       =       NULL,
2303         .bop_check_delete       =       nilfs_btree_check_delete,
2304         .bop_gather_data        =       nilfs_btree_gather_data,
2305 };
2306
2307 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2308         .bop_lookup             =       NULL,
2309         .bop_lookup_contig      =       NULL,
2310         .bop_insert             =       NULL,
2311         .bop_delete             =       NULL,
2312         .bop_clear              =       NULL,
2313
2314         .bop_propagate          =       nilfs_btree_propagate_gc,
2315
2316         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2317
2318         .bop_assign             =       nilfs_btree_assign_gc,
2319         .bop_mark               =       NULL,
2320
2321         .bop_last_key           =       NULL,
2322         .bop_check_insert       =       NULL,
2323         .bop_check_delete       =       NULL,
2324         .bop_gather_data        =       NULL,
2325 };
2326
2327 static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2328 {
2329         bmap->b_ops = &nilfs_btree_ops;
2330         bmap->b_nchildren_per_block =
2331                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2332 }
2333
2334 int nilfs_btree_init(struct nilfs_bmap *bmap)
2335 {
2336         int ret = 0;
2337
2338         __nilfs_btree_init(bmap);
2339
2340         if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap),
2341                                     bmap->b_inode->i_ino))
2342                 ret = -EIO;
2343         return ret;
2344 }
2345
2346 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2347 {
2348         bmap->b_ops = &nilfs_btree_ops_gc;
2349         bmap->b_nchildren_per_block =
2350                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2351 }