]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/nilfs2/btree.c
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
[karo-tx-linux.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * Written by Koji Sato.
17  */
18
19 #include <linux/slab.h>
20 #include <linux/string.h>
21 #include <linux/errno.h>
22 #include <linux/pagevec.h>
23 #include "nilfs.h"
24 #include "page.h"
25 #include "btnode.h"
26 #include "btree.h"
27 #include "alloc.h"
28 #include "dat.h"
29
30 static void __nilfs_btree_init(struct nilfs_bmap *bmap);
31
32 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
33 {
34         struct nilfs_btree_path *path;
35         int level = NILFS_BTREE_LEVEL_DATA;
36
37         path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
38         if (path == NULL)
39                 goto out;
40
41         for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
42                 path[level].bp_bh = NULL;
43                 path[level].bp_sib_bh = NULL;
44                 path[level].bp_index = 0;
45                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
46                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
47                 path[level].bp_op = NULL;
48         }
49
50 out:
51         return path;
52 }
53
54 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
55 {
56         int level = NILFS_BTREE_LEVEL_DATA;
57
58         for (; level < NILFS_BTREE_LEVEL_MAX; level++)
59                 brelse(path[level].bp_bh);
60
61         kmem_cache_free(nilfs_btree_path_cache, path);
62 }
63
64 /*
65  * B-tree node operations
66  */
67 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
68                                      __u64 ptr, struct buffer_head **bhp)
69 {
70         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
71         struct buffer_head *bh;
72
73         bh = nilfs_btnode_create_block(btnc, ptr);
74         if (!bh)
75                 return -ENOMEM;
76
77         set_buffer_nilfs_volatile(bh);
78         *bhp = bh;
79         return 0;
80 }
81
82 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
83 {
84         return node->bn_flags;
85 }
86
87 static void
88 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
89 {
90         node->bn_flags = flags;
91 }
92
93 static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
94 {
95         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
96 }
97
98 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
99 {
100         return node->bn_level;
101 }
102
103 static void
104 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
105 {
106         node->bn_level = level;
107 }
108
109 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
110 {
111         return le16_to_cpu(node->bn_nchildren);
112 }
113
114 static void
115 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
116 {
117         node->bn_nchildren = cpu_to_le16(nchildren);
118 }
119
120 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
121 {
122         return i_blocksize(btree->b_inode);
123 }
124
125 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
126 {
127         return btree->b_nchildren_per_block;
128 }
129
130 static __le64 *
131 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
132 {
133         return (__le64 *)((char *)(node + 1) +
134                           (nilfs_btree_node_root(node) ?
135                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
136 }
137
138 static __le64 *
139 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
140 {
141         return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
142 }
143
144 static __u64
145 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
146 {
147         return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
148 }
149
150 static void
151 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
152 {
153         *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
154 }
155
156 static __u64
157 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
158                          int ncmax)
159 {
160         return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
161 }
162
163 static void
164 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
165                          int ncmax)
166 {
167         *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
168 }
169
170 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
171                                   int level, int nchildren, int ncmax,
172                                   const __u64 *keys, const __u64 *ptrs)
173 {
174         __le64 *dkeys;
175         __le64 *dptrs;
176         int i;
177
178         nilfs_btree_node_set_flags(node, flags);
179         nilfs_btree_node_set_level(node, level);
180         nilfs_btree_node_set_nchildren(node, nchildren);
181
182         dkeys = nilfs_btree_node_dkeys(node);
183         dptrs = nilfs_btree_node_dptrs(node, ncmax);
184         for (i = 0; i < nchildren; i++) {
185                 dkeys[i] = cpu_to_le64(keys[i]);
186                 dptrs[i] = cpu_to_le64(ptrs[i]);
187         }
188 }
189
190 /* Assume the buffer heads corresponding to left and right are locked. */
191 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
192                                        struct nilfs_btree_node *right,
193                                        int n, int lncmax, int rncmax)
194 {
195         __le64 *ldkeys, *rdkeys;
196         __le64 *ldptrs, *rdptrs;
197         int lnchildren, rnchildren;
198
199         ldkeys = nilfs_btree_node_dkeys(left);
200         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
201         lnchildren = nilfs_btree_node_get_nchildren(left);
202
203         rdkeys = nilfs_btree_node_dkeys(right);
204         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
205         rnchildren = nilfs_btree_node_get_nchildren(right);
206
207         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
208         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
209         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
210         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
211
212         lnchildren += n;
213         rnchildren -= n;
214         nilfs_btree_node_set_nchildren(left, lnchildren);
215         nilfs_btree_node_set_nchildren(right, rnchildren);
216 }
217
218 /* Assume that the buffer heads corresponding to left and right are locked. */
219 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
220                                         struct nilfs_btree_node *right,
221                                         int n, int lncmax, int rncmax)
222 {
223         __le64 *ldkeys, *rdkeys;
224         __le64 *ldptrs, *rdptrs;
225         int lnchildren, rnchildren;
226
227         ldkeys = nilfs_btree_node_dkeys(left);
228         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
229         lnchildren = nilfs_btree_node_get_nchildren(left);
230
231         rdkeys = nilfs_btree_node_dkeys(right);
232         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
233         rnchildren = nilfs_btree_node_get_nchildren(right);
234
235         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
236         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
237         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
238         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
239
240         lnchildren -= n;
241         rnchildren += n;
242         nilfs_btree_node_set_nchildren(left, lnchildren);
243         nilfs_btree_node_set_nchildren(right, rnchildren);
244 }
245
246 /* Assume that the buffer head corresponding to node is locked. */
247 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
248                                     __u64 key, __u64 ptr, int ncmax)
249 {
250         __le64 *dkeys;
251         __le64 *dptrs;
252         int nchildren;
253
254         dkeys = nilfs_btree_node_dkeys(node);
255         dptrs = nilfs_btree_node_dptrs(node, ncmax);
256         nchildren = nilfs_btree_node_get_nchildren(node);
257         if (index < nchildren) {
258                 memmove(dkeys + index + 1, dkeys + index,
259                         (nchildren - index) * sizeof(*dkeys));
260                 memmove(dptrs + index + 1, dptrs + index,
261                         (nchildren - index) * sizeof(*dptrs));
262         }
263         dkeys[index] = cpu_to_le64(key);
264         dptrs[index] = cpu_to_le64(ptr);
265         nchildren++;
266         nilfs_btree_node_set_nchildren(node, nchildren);
267 }
268
269 /* Assume that the buffer head corresponding to node is locked. */
270 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
271                                     __u64 *keyp, __u64 *ptrp, int ncmax)
272 {
273         __u64 key;
274         __u64 ptr;
275         __le64 *dkeys;
276         __le64 *dptrs;
277         int nchildren;
278
279         dkeys = nilfs_btree_node_dkeys(node);
280         dptrs = nilfs_btree_node_dptrs(node, ncmax);
281         key = le64_to_cpu(dkeys[index]);
282         ptr = le64_to_cpu(dptrs[index]);
283         nchildren = nilfs_btree_node_get_nchildren(node);
284         if (keyp != NULL)
285                 *keyp = key;
286         if (ptrp != NULL)
287                 *ptrp = ptr;
288
289         if (index < nchildren - 1) {
290                 memmove(dkeys + index, dkeys + index + 1,
291                         (nchildren - index - 1) * sizeof(*dkeys));
292                 memmove(dptrs + index, dptrs + index + 1,
293                         (nchildren - index - 1) * sizeof(*dptrs));
294         }
295         nchildren--;
296         nilfs_btree_node_set_nchildren(node, nchildren);
297 }
298
299 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
300                                    __u64 key, int *indexp)
301 {
302         __u64 nkey;
303         int index, low, high, s;
304
305         /* binary search */
306         low = 0;
307         high = nilfs_btree_node_get_nchildren(node) - 1;
308         index = 0;
309         s = 0;
310         while (low <= high) {
311                 index = (low + high) / 2;
312                 nkey = nilfs_btree_node_get_key(node, index);
313                 if (nkey == key) {
314                         s = 0;
315                         goto out;
316                 } else if (nkey < key) {
317                         low = index + 1;
318                         s = -1;
319                 } else {
320                         high = index - 1;
321                         s = 1;
322                 }
323         }
324
325         /* adjust index */
326         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
327                 if (s > 0 && index > 0)
328                         index--;
329         } else if (s < 0)
330                 index++;
331
332  out:
333         *indexp = index;
334
335         return s == 0;
336 }
337
338 /**
339  * nilfs_btree_node_broken - verify consistency of btree node
340  * @node: btree node block to be examined
341  * @size: node size (in bytes)
342  * @inode: host inode of btree
343  * @blocknr: block number
344  *
345  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
346  */
347 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
348                                    size_t size, struct inode *inode,
349                                    sector_t blocknr)
350 {
351         int level, flags, nchildren;
352         int ret = 0;
353
354         level = nilfs_btree_node_get_level(node);
355         flags = nilfs_btree_node_get_flags(node);
356         nchildren = nilfs_btree_node_get_nchildren(node);
357
358         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
359                      level >= NILFS_BTREE_LEVEL_MAX ||
360                      (flags & NILFS_BTREE_NODE_ROOT) ||
361                      nchildren < 0 ||
362                      nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
363                 nilfs_msg(inode->i_sb, KERN_CRIT,
364                           "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
365                           inode->i_ino, (unsigned long long)blocknr, level,
366                           flags, nchildren);
367                 ret = 1;
368         }
369         return ret;
370 }
371
372 /**
373  * nilfs_btree_root_broken - verify consistency of btree root node
374  * @node: btree root node to be examined
375  * @inode: host inode of btree
376  *
377  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
378  */
379 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
380                                    struct inode *inode)
381 {
382         int level, flags, nchildren;
383         int ret = 0;
384
385         level = nilfs_btree_node_get_level(node);
386         flags = nilfs_btree_node_get_flags(node);
387         nchildren = nilfs_btree_node_get_nchildren(node);
388
389         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
390                      level >= NILFS_BTREE_LEVEL_MAX ||
391                      nchildren < 0 ||
392                      nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
393                 nilfs_msg(inode->i_sb, KERN_CRIT,
394                           "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
395                           inode->i_ino, level, flags, nchildren);
396                 ret = 1;
397         }
398         return ret;
399 }
400
401 int nilfs_btree_broken_node_block(struct buffer_head *bh)
402 {
403         struct inode *inode;
404         int ret;
405
406         if (buffer_nilfs_checked(bh))
407                 return 0;
408
409         inode = bh->b_page->mapping->host;
410         ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
411                                       bh->b_size, inode, bh->b_blocknr);
412         if (likely(!ret))
413                 set_buffer_nilfs_checked(bh);
414         return ret;
415 }
416
417 static struct nilfs_btree_node *
418 nilfs_btree_get_root(const struct nilfs_bmap *btree)
419 {
420         return (struct nilfs_btree_node *)btree->b_u.u_data;
421 }
422
423 static struct nilfs_btree_node *
424 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
425 {
426         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
427 }
428
429 static struct nilfs_btree_node *
430 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
431 {
432         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
433 }
434
435 static int nilfs_btree_height(const struct nilfs_bmap *btree)
436 {
437         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
438 }
439
440 static struct nilfs_btree_node *
441 nilfs_btree_get_node(const struct nilfs_bmap *btree,
442                      const struct nilfs_btree_path *path,
443                      int level, int *ncmaxp)
444 {
445         struct nilfs_btree_node *node;
446
447         if (level == nilfs_btree_height(btree) - 1) {
448                 node = nilfs_btree_get_root(btree);
449                 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
450         } else {
451                 node = nilfs_btree_get_nonroot_node(path, level);
452                 *ncmaxp = nilfs_btree_nchildren_per_block(btree);
453         }
454         return node;
455 }
456
457 static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
458                                 struct nilfs_btree_node *node, int level)
459 {
460         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
461                 dump_stack();
462                 nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
463                           "btree level mismatch (ino=%lu): %d != %d",
464                           btree->b_inode->i_ino,
465                           nilfs_btree_node_get_level(node), level);
466                 return 1;
467         }
468         return 0;
469 }
470
471 struct nilfs_btree_readahead_info {
472         struct nilfs_btree_node *node;  /* parent node */
473         int max_ra_blocks;              /* max nof blocks to read ahead */
474         int index;                      /* current index on the parent node */
475         int ncmax;                      /* nof children in the parent node */
476 };
477
478 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
479                                    struct buffer_head **bhp,
480                                    const struct nilfs_btree_readahead_info *ra)
481 {
482         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
483         struct buffer_head *bh, *ra_bh;
484         sector_t submit_ptr = 0;
485         int ret;
486
487         ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh,
488                                         &submit_ptr);
489         if (ret) {
490                 if (ret != -EEXIST)
491                         return ret;
492                 goto out_check;
493         }
494
495         if (ra) {
496                 int i, n;
497                 __u64 ptr2;
498
499                 /* read ahead sibling nodes */
500                 for (n = ra->max_ra_blocks, i = ra->index + 1;
501                      n > 0 && i < ra->ncmax; n--, i++) {
502                         ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
503
504                         ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
505                                                         REQ_OP_READ, REQ_RAHEAD,
506                                                         &ra_bh, &submit_ptr);
507                         if (likely(!ret || ret == -EEXIST))
508                                 brelse(ra_bh);
509                         else if (ret != -EBUSY)
510                                 break;
511                         if (!buffer_locked(bh))
512                                 goto out_no_wait;
513                 }
514         }
515
516         wait_on_buffer(bh);
517
518  out_no_wait:
519         if (!buffer_uptodate(bh)) {
520                 nilfs_msg(btree->b_inode->i_sb, KERN_ERR,
521                           "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
522                           btree->b_inode->i_ino, (unsigned long long)ptr);
523                 brelse(bh);
524                 return -EIO;
525         }
526
527  out_check:
528         if (nilfs_btree_broken_node_block(bh)) {
529                 clear_buffer_uptodate(bh);
530                 brelse(bh);
531                 return -EINVAL;
532         }
533
534         *bhp = bh;
535         return 0;
536 }
537
538 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
539                                    struct buffer_head **bhp)
540 {
541         return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
542 }
543
544 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
545                                  struct nilfs_btree_path *path,
546                                  __u64 key, __u64 *ptrp, int minlevel,
547                                  int readahead)
548 {
549         struct nilfs_btree_node *node;
550         struct nilfs_btree_readahead_info p, *ra;
551         __u64 ptr;
552         int level, index, found, ncmax, ret;
553
554         node = nilfs_btree_get_root(btree);
555         level = nilfs_btree_node_get_level(node);
556         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
557                 return -ENOENT;
558
559         found = nilfs_btree_node_lookup(node, key, &index);
560         ptr = nilfs_btree_node_get_ptr(node, index,
561                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
562         path[level].bp_bh = NULL;
563         path[level].bp_index = index;
564
565         ncmax = nilfs_btree_nchildren_per_block(btree);
566
567         while (--level >= minlevel) {
568                 ra = NULL;
569                 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
570                         p.node = nilfs_btree_get_node(btree, path, level + 1,
571                                                       &p.ncmax);
572                         p.index = index;
573                         p.max_ra_blocks = 7;
574                         ra = &p;
575                 }
576                 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
577                                               ra);
578                 if (ret < 0)
579                         return ret;
580
581                 node = nilfs_btree_get_nonroot_node(path, level);
582                 if (nilfs_btree_bad_node(btree, node, level))
583                         return -EINVAL;
584                 if (!found)
585                         found = nilfs_btree_node_lookup(node, key, &index);
586                 else
587                         index = 0;
588                 if (index < ncmax) {
589                         ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
590                 } else {
591                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
592                         /* insert */
593                         ptr = NILFS_BMAP_INVALID_PTR;
594                 }
595                 path[level].bp_index = index;
596         }
597         if (!found)
598                 return -ENOENT;
599
600         if (ptrp != NULL)
601                 *ptrp = ptr;
602
603         return 0;
604 }
605
606 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
607                                       struct nilfs_btree_path *path,
608                                       __u64 *keyp, __u64 *ptrp)
609 {
610         struct nilfs_btree_node *node;
611         __u64 ptr;
612         int index, level, ncmax, ret;
613
614         node = nilfs_btree_get_root(btree);
615         index = nilfs_btree_node_get_nchildren(node) - 1;
616         if (index < 0)
617                 return -ENOENT;
618         level = nilfs_btree_node_get_level(node);
619         ptr = nilfs_btree_node_get_ptr(node, index,
620                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
621         path[level].bp_bh = NULL;
622         path[level].bp_index = index;
623         ncmax = nilfs_btree_nchildren_per_block(btree);
624
625         for (level--; level > 0; level--) {
626                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
627                 if (ret < 0)
628                         return ret;
629                 node = nilfs_btree_get_nonroot_node(path, level);
630                 if (nilfs_btree_bad_node(btree, node, level))
631                         return -EINVAL;
632                 index = nilfs_btree_node_get_nchildren(node) - 1;
633                 ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
634                 path[level].bp_index = index;
635         }
636
637         if (keyp != NULL)
638                 *keyp = nilfs_btree_node_get_key(node, index);
639         if (ptrp != NULL)
640                 *ptrp = ptr;
641
642         return 0;
643 }
644
645 /**
646  * nilfs_btree_get_next_key - get next valid key from btree path array
647  * @btree: bmap struct of btree
648  * @path: array of nilfs_btree_path struct
649  * @minlevel: start level
650  * @nextkey: place to store the next valid key
651  *
652  * Return Value: If a next key was found, 0 is returned. Otherwise,
653  * -ENOENT is returned.
654  */
655 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
656                                     const struct nilfs_btree_path *path,
657                                     int minlevel, __u64 *nextkey)
658 {
659         struct nilfs_btree_node *node;
660         int maxlevel = nilfs_btree_height(btree) - 1;
661         int index, next_adj, level;
662
663         /* Next index is already set to bp_index for leaf nodes. */
664         next_adj = 0;
665         for (level = minlevel; level <= maxlevel; level++) {
666                 if (level == maxlevel)
667                         node = nilfs_btree_get_root(btree);
668                 else
669                         node = nilfs_btree_get_nonroot_node(path, level);
670
671                 index = path[level].bp_index + next_adj;
672                 if (index < nilfs_btree_node_get_nchildren(node)) {
673                         /* Next key is in this node */
674                         *nextkey = nilfs_btree_node_get_key(node, index);
675                         return 0;
676                 }
677                 /* For non-leaf nodes, next index is stored at bp_index + 1. */
678                 next_adj = 1;
679         }
680         return -ENOENT;
681 }
682
683 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
684                               __u64 key, int level, __u64 *ptrp)
685 {
686         struct nilfs_btree_path *path;
687         int ret;
688
689         path = nilfs_btree_alloc_path();
690         if (path == NULL)
691                 return -ENOMEM;
692
693         ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
694
695         nilfs_btree_free_path(path);
696
697         return ret;
698 }
699
700 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
701                                      __u64 key, __u64 *ptrp,
702                                      unsigned int maxblocks)
703 {
704         struct nilfs_btree_path *path;
705         struct nilfs_btree_node *node;
706         struct inode *dat = NULL;
707         __u64 ptr, ptr2;
708         sector_t blocknr;
709         int level = NILFS_BTREE_LEVEL_NODE_MIN;
710         int ret, cnt, index, maxlevel, ncmax;
711         struct nilfs_btree_readahead_info p;
712
713         path = nilfs_btree_alloc_path();
714         if (path == NULL)
715                 return -ENOMEM;
716
717         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
718         if (ret < 0)
719                 goto out;
720
721         if (NILFS_BMAP_USE_VBN(btree)) {
722                 dat = nilfs_bmap_get_dat(btree);
723                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
724                 if (ret < 0)
725                         goto out;
726                 ptr = blocknr;
727         }
728         cnt = 1;
729         if (cnt == maxblocks)
730                 goto end;
731
732         maxlevel = nilfs_btree_height(btree) - 1;
733         node = nilfs_btree_get_node(btree, path, level, &ncmax);
734         index = path[level].bp_index + 1;
735         for (;;) {
736                 while (index < nilfs_btree_node_get_nchildren(node)) {
737                         if (nilfs_btree_node_get_key(node, index) !=
738                             key + cnt)
739                                 goto end;
740                         ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
741                         if (dat) {
742                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
743                                 if (ret < 0)
744                                         goto out;
745                                 ptr2 = blocknr;
746                         }
747                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
748                                 goto end;
749                         index++;
750                         continue;
751                 }
752                 if (level == maxlevel)
753                         break;
754
755                 /* look-up right sibling node */
756                 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
757                 p.index = path[level + 1].bp_index + 1;
758                 p.max_ra_blocks = 7;
759                 if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
760                     nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
761                         break;
762                 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
763                 path[level + 1].bp_index = p.index;
764
765                 brelse(path[level].bp_bh);
766                 path[level].bp_bh = NULL;
767
768                 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
769                                               &p);
770                 if (ret < 0)
771                         goto out;
772                 node = nilfs_btree_get_nonroot_node(path, level);
773                 ncmax = nilfs_btree_nchildren_per_block(btree);
774                 index = 0;
775                 path[level].bp_index = index;
776         }
777  end:
778         *ptrp = ptr;
779         ret = cnt;
780  out:
781         nilfs_btree_free_path(path);
782         return ret;
783 }
784
785 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
786                                     struct nilfs_btree_path *path,
787                                     int level, __u64 key)
788 {
789         if (level < nilfs_btree_height(btree) - 1) {
790                 do {
791                         nilfs_btree_node_set_key(
792                                 nilfs_btree_get_nonroot_node(path, level),
793                                 path[level].bp_index, key);
794                         if (!buffer_dirty(path[level].bp_bh))
795                                 mark_buffer_dirty(path[level].bp_bh);
796                 } while ((path[level].bp_index == 0) &&
797                          (++level < nilfs_btree_height(btree) - 1));
798         }
799
800         /* root */
801         if (level == nilfs_btree_height(btree) - 1) {
802                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
803                                          path[level].bp_index, key);
804         }
805 }
806
807 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
808                                   struct nilfs_btree_path *path,
809                                   int level, __u64 *keyp, __u64 *ptrp)
810 {
811         struct nilfs_btree_node *node;
812         int ncblk;
813
814         if (level < nilfs_btree_height(btree) - 1) {
815                 node = nilfs_btree_get_nonroot_node(path, level);
816                 ncblk = nilfs_btree_nchildren_per_block(btree);
817                 nilfs_btree_node_insert(node, path[level].bp_index,
818                                         *keyp, *ptrp, ncblk);
819                 if (!buffer_dirty(path[level].bp_bh))
820                         mark_buffer_dirty(path[level].bp_bh);
821
822                 if (path[level].bp_index == 0)
823                         nilfs_btree_promote_key(btree, path, level + 1,
824                                                 nilfs_btree_node_get_key(node,
825                                                                          0));
826         } else {
827                 node = nilfs_btree_get_root(btree);
828                 nilfs_btree_node_insert(node, path[level].bp_index,
829                                         *keyp, *ptrp,
830                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
831         }
832 }
833
834 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
835                                    struct nilfs_btree_path *path,
836                                    int level, __u64 *keyp, __u64 *ptrp)
837 {
838         struct nilfs_btree_node *node, *left;
839         int nchildren, lnchildren, n, move, ncblk;
840
841         node = nilfs_btree_get_nonroot_node(path, level);
842         left = nilfs_btree_get_sib_node(path, level);
843         nchildren = nilfs_btree_node_get_nchildren(node);
844         lnchildren = nilfs_btree_node_get_nchildren(left);
845         ncblk = nilfs_btree_nchildren_per_block(btree);
846         move = 0;
847
848         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
849         if (n > path[level].bp_index) {
850                 /* move insert point */
851                 n--;
852                 move = 1;
853         }
854
855         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
856
857         if (!buffer_dirty(path[level].bp_bh))
858                 mark_buffer_dirty(path[level].bp_bh);
859         if (!buffer_dirty(path[level].bp_sib_bh))
860                 mark_buffer_dirty(path[level].bp_sib_bh);
861
862         nilfs_btree_promote_key(btree, path, level + 1,
863                                 nilfs_btree_node_get_key(node, 0));
864
865         if (move) {
866                 brelse(path[level].bp_bh);
867                 path[level].bp_bh = path[level].bp_sib_bh;
868                 path[level].bp_sib_bh = NULL;
869                 path[level].bp_index += lnchildren;
870                 path[level + 1].bp_index--;
871         } else {
872                 brelse(path[level].bp_sib_bh);
873                 path[level].bp_sib_bh = NULL;
874                 path[level].bp_index -= n;
875         }
876
877         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
878 }
879
880 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
881                                     struct nilfs_btree_path *path,
882                                     int level, __u64 *keyp, __u64 *ptrp)
883 {
884         struct nilfs_btree_node *node, *right;
885         int nchildren, rnchildren, n, move, ncblk;
886
887         node = nilfs_btree_get_nonroot_node(path, level);
888         right = nilfs_btree_get_sib_node(path, level);
889         nchildren = nilfs_btree_node_get_nchildren(node);
890         rnchildren = nilfs_btree_node_get_nchildren(right);
891         ncblk = nilfs_btree_nchildren_per_block(btree);
892         move = 0;
893
894         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
895         if (n > nchildren - path[level].bp_index) {
896                 /* move insert point */
897                 n--;
898                 move = 1;
899         }
900
901         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
902
903         if (!buffer_dirty(path[level].bp_bh))
904                 mark_buffer_dirty(path[level].bp_bh);
905         if (!buffer_dirty(path[level].bp_sib_bh))
906                 mark_buffer_dirty(path[level].bp_sib_bh);
907
908         path[level + 1].bp_index++;
909         nilfs_btree_promote_key(btree, path, level + 1,
910                                 nilfs_btree_node_get_key(right, 0));
911         path[level + 1].bp_index--;
912
913         if (move) {
914                 brelse(path[level].bp_bh);
915                 path[level].bp_bh = path[level].bp_sib_bh;
916                 path[level].bp_sib_bh = NULL;
917                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
918                 path[level + 1].bp_index++;
919         } else {
920                 brelse(path[level].bp_sib_bh);
921                 path[level].bp_sib_bh = NULL;
922         }
923
924         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
925 }
926
927 static void nilfs_btree_split(struct nilfs_bmap *btree,
928                               struct nilfs_btree_path *path,
929                               int level, __u64 *keyp, __u64 *ptrp)
930 {
931         struct nilfs_btree_node *node, *right;
932         int nchildren, n, move, ncblk;
933
934         node = nilfs_btree_get_nonroot_node(path, level);
935         right = nilfs_btree_get_sib_node(path, level);
936         nchildren = nilfs_btree_node_get_nchildren(node);
937         ncblk = nilfs_btree_nchildren_per_block(btree);
938         move = 0;
939
940         n = (nchildren + 1) / 2;
941         if (n > nchildren - path[level].bp_index) {
942                 n--;
943                 move = 1;
944         }
945
946         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
947
948         if (!buffer_dirty(path[level].bp_bh))
949                 mark_buffer_dirty(path[level].bp_bh);
950         if (!buffer_dirty(path[level].bp_sib_bh))
951                 mark_buffer_dirty(path[level].bp_sib_bh);
952
953         if (move) {
954                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
955                 nilfs_btree_node_insert(right, path[level].bp_index,
956                                         *keyp, *ptrp, ncblk);
957
958                 *keyp = nilfs_btree_node_get_key(right, 0);
959                 *ptrp = path[level].bp_newreq.bpr_ptr;
960
961                 brelse(path[level].bp_bh);
962                 path[level].bp_bh = path[level].bp_sib_bh;
963                 path[level].bp_sib_bh = NULL;
964         } else {
965                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
966
967                 *keyp = nilfs_btree_node_get_key(right, 0);
968                 *ptrp = path[level].bp_newreq.bpr_ptr;
969
970                 brelse(path[level].bp_sib_bh);
971                 path[level].bp_sib_bh = NULL;
972         }
973
974         path[level + 1].bp_index++;
975 }
976
977 static void nilfs_btree_grow(struct nilfs_bmap *btree,
978                              struct nilfs_btree_path *path,
979                              int level, __u64 *keyp, __u64 *ptrp)
980 {
981         struct nilfs_btree_node *root, *child;
982         int n, ncblk;
983
984         root = nilfs_btree_get_root(btree);
985         child = nilfs_btree_get_sib_node(path, level);
986         ncblk = nilfs_btree_nchildren_per_block(btree);
987
988         n = nilfs_btree_node_get_nchildren(root);
989
990         nilfs_btree_node_move_right(root, child, n,
991                                     NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
992         nilfs_btree_node_set_level(root, level + 1);
993
994         if (!buffer_dirty(path[level].bp_sib_bh))
995                 mark_buffer_dirty(path[level].bp_sib_bh);
996
997         path[level].bp_bh = path[level].bp_sib_bh;
998         path[level].bp_sib_bh = NULL;
999
1000         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
1001
1002         *keyp = nilfs_btree_node_get_key(child, 0);
1003         *ptrp = path[level].bp_newreq.bpr_ptr;
1004 }
1005
1006 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
1007                                    const struct nilfs_btree_path *path)
1008 {
1009         struct nilfs_btree_node *node;
1010         int level, ncmax;
1011
1012         if (path == NULL)
1013                 return NILFS_BMAP_INVALID_PTR;
1014
1015         /* left sibling */
1016         level = NILFS_BTREE_LEVEL_NODE_MIN;
1017         if (path[level].bp_index > 0) {
1018                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1019                 return nilfs_btree_node_get_ptr(node,
1020                                                 path[level].bp_index - 1,
1021                                                 ncmax);
1022         }
1023
1024         /* parent */
1025         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1026         if (level <= nilfs_btree_height(btree) - 1) {
1027                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1028                 return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1029                                                 ncmax);
1030         }
1031
1032         return NILFS_BMAP_INVALID_PTR;
1033 }
1034
1035 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1036                                        const struct nilfs_btree_path *path,
1037                                        __u64 key)
1038 {
1039         __u64 ptr;
1040
1041         ptr = nilfs_bmap_find_target_seq(btree, key);
1042         if (ptr != NILFS_BMAP_INVALID_PTR)
1043                 /* sequential access */
1044                 return ptr;
1045
1046         ptr = nilfs_btree_find_near(btree, path);
1047         if (ptr != NILFS_BMAP_INVALID_PTR)
1048                 /* near */
1049                 return ptr;
1050
1051         /* block group */
1052         return nilfs_bmap_find_target_in_group(btree);
1053 }
1054
1055 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1056                                       struct nilfs_btree_path *path,
1057                                       int *levelp, __u64 key, __u64 ptr,
1058                                       struct nilfs_bmap_stats *stats)
1059 {
1060         struct buffer_head *bh;
1061         struct nilfs_btree_node *node, *parent, *sib;
1062         __u64 sibptr;
1063         int pindex, level, ncmax, ncblk, ret;
1064         struct inode *dat = NULL;
1065
1066         stats->bs_nblocks = 0;
1067         level = NILFS_BTREE_LEVEL_DATA;
1068
1069         /* allocate a new ptr for data block */
1070         if (NILFS_BMAP_USE_VBN(btree)) {
1071                 path[level].bp_newreq.bpr_ptr =
1072                         nilfs_btree_find_target_v(btree, path, key);
1073                 dat = nilfs_bmap_get_dat(btree);
1074         }
1075
1076         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1077         if (ret < 0)
1078                 goto err_out_data;
1079
1080         ncblk = nilfs_btree_nchildren_per_block(btree);
1081
1082         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1083              level < nilfs_btree_height(btree) - 1;
1084              level++) {
1085                 node = nilfs_btree_get_nonroot_node(path, level);
1086                 if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1087                         path[level].bp_op = nilfs_btree_do_insert;
1088                         stats->bs_nblocks++;
1089                         goto out;
1090                 }
1091
1092                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1093                 pindex = path[level + 1].bp_index;
1094
1095                 /* left sibling */
1096                 if (pindex > 0) {
1097                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1098                                                           ncmax);
1099                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1100                         if (ret < 0)
1101                                 goto err_out_child_node;
1102                         sib = (struct nilfs_btree_node *)bh->b_data;
1103                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1104                                 path[level].bp_sib_bh = bh;
1105                                 path[level].bp_op = nilfs_btree_carry_left;
1106                                 stats->bs_nblocks++;
1107                                 goto out;
1108                         } else {
1109                                 brelse(bh);
1110                         }
1111                 }
1112
1113                 /* right sibling */
1114                 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1115                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1116                                                           ncmax);
1117                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1118                         if (ret < 0)
1119                                 goto err_out_child_node;
1120                         sib = (struct nilfs_btree_node *)bh->b_data;
1121                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1122                                 path[level].bp_sib_bh = bh;
1123                                 path[level].bp_op = nilfs_btree_carry_right;
1124                                 stats->bs_nblocks++;
1125                                 goto out;
1126                         } else {
1127                                 brelse(bh);
1128                         }
1129                 }
1130
1131                 /* split */
1132                 path[level].bp_newreq.bpr_ptr =
1133                         path[level - 1].bp_newreq.bpr_ptr + 1;
1134                 ret = nilfs_bmap_prepare_alloc_ptr(btree,
1135                                                    &path[level].bp_newreq, dat);
1136                 if (ret < 0)
1137                         goto err_out_child_node;
1138                 ret = nilfs_btree_get_new_block(btree,
1139                                                 path[level].bp_newreq.bpr_ptr,
1140                                                 &bh);
1141                 if (ret < 0)
1142                         goto err_out_curr_node;
1143
1144                 stats->bs_nblocks++;
1145
1146                 sib = (struct nilfs_btree_node *)bh->b_data;
1147                 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1148                 path[level].bp_sib_bh = bh;
1149                 path[level].bp_op = nilfs_btree_split;
1150         }
1151
1152         /* root */
1153         node = nilfs_btree_get_root(btree);
1154         if (nilfs_btree_node_get_nchildren(node) <
1155             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1156                 path[level].bp_op = nilfs_btree_do_insert;
1157                 stats->bs_nblocks++;
1158                 goto out;
1159         }
1160
1161         /* grow */
1162         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1163         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1164         if (ret < 0)
1165                 goto err_out_child_node;
1166         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1167                                         &bh);
1168         if (ret < 0)
1169                 goto err_out_curr_node;
1170
1171         nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1172                               0, level, 0, ncblk, NULL, NULL);
1173         path[level].bp_sib_bh = bh;
1174         path[level].bp_op = nilfs_btree_grow;
1175
1176         level++;
1177         path[level].bp_op = nilfs_btree_do_insert;
1178
1179         /* a newly-created node block and a data block are added */
1180         stats->bs_nblocks += 2;
1181
1182         /* success */
1183  out:
1184         *levelp = level;
1185         return ret;
1186
1187         /* error */
1188  err_out_curr_node:
1189         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1190  err_out_child_node:
1191         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1192                 nilfs_btnode_delete(path[level].bp_sib_bh);
1193                 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1194
1195         }
1196
1197         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1198  err_out_data:
1199         *levelp = level;
1200         stats->bs_nblocks = 0;
1201         return ret;
1202 }
1203
1204 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1205                                       struct nilfs_btree_path *path,
1206                                       int maxlevel, __u64 key, __u64 ptr)
1207 {
1208         struct inode *dat = NULL;
1209         int level;
1210
1211         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1212         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1213         if (NILFS_BMAP_USE_VBN(btree)) {
1214                 nilfs_bmap_set_target_v(btree, key, ptr);
1215                 dat = nilfs_bmap_get_dat(btree);
1216         }
1217
1218         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1219                 nilfs_bmap_commit_alloc_ptr(btree,
1220                                             &path[level - 1].bp_newreq, dat);
1221                 path[level].bp_op(btree, path, level, &key, &ptr);
1222         }
1223
1224         if (!nilfs_bmap_dirty(btree))
1225                 nilfs_bmap_set_dirty(btree);
1226 }
1227
1228 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1229 {
1230         struct nilfs_btree_path *path;
1231         struct nilfs_bmap_stats stats;
1232         int level, ret;
1233
1234         path = nilfs_btree_alloc_path();
1235         if (path == NULL)
1236                 return -ENOMEM;
1237
1238         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1239                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1240         if (ret != -ENOENT) {
1241                 if (ret == 0)
1242                         ret = -EEXIST;
1243                 goto out;
1244         }
1245
1246         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1247         if (ret < 0)
1248                 goto out;
1249         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1250         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1251
1252  out:
1253         nilfs_btree_free_path(path);
1254         return ret;
1255 }
1256
1257 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1258                                   struct nilfs_btree_path *path,
1259                                   int level, __u64 *keyp, __u64 *ptrp)
1260 {
1261         struct nilfs_btree_node *node;
1262         int ncblk;
1263
1264         if (level < nilfs_btree_height(btree) - 1) {
1265                 node = nilfs_btree_get_nonroot_node(path, level);
1266                 ncblk = nilfs_btree_nchildren_per_block(btree);
1267                 nilfs_btree_node_delete(node, path[level].bp_index,
1268                                         keyp, ptrp, ncblk);
1269                 if (!buffer_dirty(path[level].bp_bh))
1270                         mark_buffer_dirty(path[level].bp_bh);
1271                 if (path[level].bp_index == 0)
1272                         nilfs_btree_promote_key(btree, path, level + 1,
1273                                 nilfs_btree_node_get_key(node, 0));
1274         } else {
1275                 node = nilfs_btree_get_root(btree);
1276                 nilfs_btree_node_delete(node, path[level].bp_index,
1277                                         keyp, ptrp,
1278                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1279         }
1280 }
1281
1282 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1283                                     struct nilfs_btree_path *path,
1284                                     int level, __u64 *keyp, __u64 *ptrp)
1285 {
1286         struct nilfs_btree_node *node, *left;
1287         int nchildren, lnchildren, n, ncblk;
1288
1289         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1290
1291         node = nilfs_btree_get_nonroot_node(path, level);
1292         left = nilfs_btree_get_sib_node(path, level);
1293         nchildren = nilfs_btree_node_get_nchildren(node);
1294         lnchildren = nilfs_btree_node_get_nchildren(left);
1295         ncblk = nilfs_btree_nchildren_per_block(btree);
1296
1297         n = (nchildren + lnchildren) / 2 - nchildren;
1298
1299         nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1300
1301         if (!buffer_dirty(path[level].bp_bh))
1302                 mark_buffer_dirty(path[level].bp_bh);
1303         if (!buffer_dirty(path[level].bp_sib_bh))
1304                 mark_buffer_dirty(path[level].bp_sib_bh);
1305
1306         nilfs_btree_promote_key(btree, path, level + 1,
1307                                 nilfs_btree_node_get_key(node, 0));
1308
1309         brelse(path[level].bp_sib_bh);
1310         path[level].bp_sib_bh = NULL;
1311         path[level].bp_index += n;
1312 }
1313
1314 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1315                                      struct nilfs_btree_path *path,
1316                                      int level, __u64 *keyp, __u64 *ptrp)
1317 {
1318         struct nilfs_btree_node *node, *right;
1319         int nchildren, rnchildren, n, ncblk;
1320
1321         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1322
1323         node = nilfs_btree_get_nonroot_node(path, level);
1324         right = nilfs_btree_get_sib_node(path, level);
1325         nchildren = nilfs_btree_node_get_nchildren(node);
1326         rnchildren = nilfs_btree_node_get_nchildren(right);
1327         ncblk = nilfs_btree_nchildren_per_block(btree);
1328
1329         n = (nchildren + rnchildren) / 2 - nchildren;
1330
1331         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1332
1333         if (!buffer_dirty(path[level].bp_bh))
1334                 mark_buffer_dirty(path[level].bp_bh);
1335         if (!buffer_dirty(path[level].bp_sib_bh))
1336                 mark_buffer_dirty(path[level].bp_sib_bh);
1337
1338         path[level + 1].bp_index++;
1339         nilfs_btree_promote_key(btree, path, level + 1,
1340                                 nilfs_btree_node_get_key(right, 0));
1341         path[level + 1].bp_index--;
1342
1343         brelse(path[level].bp_sib_bh);
1344         path[level].bp_sib_bh = NULL;
1345 }
1346
1347 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1348                                     struct nilfs_btree_path *path,
1349                                     int level, __u64 *keyp, __u64 *ptrp)
1350 {
1351         struct nilfs_btree_node *node, *left;
1352         int n, ncblk;
1353
1354         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1355
1356         node = nilfs_btree_get_nonroot_node(path, level);
1357         left = nilfs_btree_get_sib_node(path, level);
1358         ncblk = nilfs_btree_nchildren_per_block(btree);
1359
1360         n = nilfs_btree_node_get_nchildren(node);
1361
1362         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1363
1364         if (!buffer_dirty(path[level].bp_sib_bh))
1365                 mark_buffer_dirty(path[level].bp_sib_bh);
1366
1367         nilfs_btnode_delete(path[level].bp_bh);
1368         path[level].bp_bh = path[level].bp_sib_bh;
1369         path[level].bp_sib_bh = NULL;
1370         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1371 }
1372
1373 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1374                                      struct nilfs_btree_path *path,
1375                                      int level, __u64 *keyp, __u64 *ptrp)
1376 {
1377         struct nilfs_btree_node *node, *right;
1378         int n, ncblk;
1379
1380         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1381
1382         node = nilfs_btree_get_nonroot_node(path, level);
1383         right = nilfs_btree_get_sib_node(path, level);
1384         ncblk = nilfs_btree_nchildren_per_block(btree);
1385
1386         n = nilfs_btree_node_get_nchildren(right);
1387
1388         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1389
1390         if (!buffer_dirty(path[level].bp_bh))
1391                 mark_buffer_dirty(path[level].bp_bh);
1392
1393         nilfs_btnode_delete(path[level].bp_sib_bh);
1394         path[level].bp_sib_bh = NULL;
1395         path[level + 1].bp_index++;
1396 }
1397
1398 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1399                                struct nilfs_btree_path *path,
1400                                int level, __u64 *keyp, __u64 *ptrp)
1401 {
1402         struct nilfs_btree_node *root, *child;
1403         int n, ncblk;
1404
1405         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1406
1407         root = nilfs_btree_get_root(btree);
1408         child = nilfs_btree_get_nonroot_node(path, level);
1409         ncblk = nilfs_btree_nchildren_per_block(btree);
1410
1411         nilfs_btree_node_delete(root, 0, NULL, NULL,
1412                                 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1413         nilfs_btree_node_set_level(root, level);
1414         n = nilfs_btree_node_get_nchildren(child);
1415         nilfs_btree_node_move_left(root, child, n,
1416                                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1417
1418         nilfs_btnode_delete(path[level].bp_bh);
1419         path[level].bp_bh = NULL;
1420 }
1421
1422 static void nilfs_btree_nop(struct nilfs_bmap *btree,
1423                             struct nilfs_btree_path *path,
1424                             int level, __u64 *keyp, __u64 *ptrp)
1425 {
1426 }
1427
1428 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1429                                       struct nilfs_btree_path *path,
1430                                       int *levelp,
1431                                       struct nilfs_bmap_stats *stats,
1432                                       struct inode *dat)
1433 {
1434         struct buffer_head *bh;
1435         struct nilfs_btree_node *node, *parent, *sib;
1436         __u64 sibptr;
1437         int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1438
1439         ret = 0;
1440         stats->bs_nblocks = 0;
1441         ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1442         ncblk = nilfs_btree_nchildren_per_block(btree);
1443
1444         for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1445              level < nilfs_btree_height(btree) - 1;
1446              level++) {
1447                 node = nilfs_btree_get_nonroot_node(path, level);
1448                 path[level].bp_oldreq.bpr_ptr =
1449                         nilfs_btree_node_get_ptr(node, dindex, ncblk);
1450                 ret = nilfs_bmap_prepare_end_ptr(btree,
1451                                                  &path[level].bp_oldreq, dat);
1452                 if (ret < 0)
1453                         goto err_out_child_node;
1454
1455                 if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1456                         path[level].bp_op = nilfs_btree_do_delete;
1457                         stats->bs_nblocks++;
1458                         goto out;
1459                 }
1460
1461                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1462                 pindex = path[level + 1].bp_index;
1463                 dindex = pindex;
1464
1465                 if (pindex > 0) {
1466                         /* left sibling */
1467                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1468                                                           ncmax);
1469                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1470                         if (ret < 0)
1471                                 goto err_out_curr_node;
1472                         sib = (struct nilfs_btree_node *)bh->b_data;
1473                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1474                                 path[level].bp_sib_bh = bh;
1475                                 path[level].bp_op = nilfs_btree_borrow_left;
1476                                 stats->bs_nblocks++;
1477                                 goto out;
1478                         } else {
1479                                 path[level].bp_sib_bh = bh;
1480                                 path[level].bp_op = nilfs_btree_concat_left;
1481                                 stats->bs_nblocks++;
1482                                 /* continue; */
1483                         }
1484                 } else if (pindex <
1485                            nilfs_btree_node_get_nchildren(parent) - 1) {
1486                         /* right sibling */
1487                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1488                                                           ncmax);
1489                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1490                         if (ret < 0)
1491                                 goto err_out_curr_node;
1492                         sib = (struct nilfs_btree_node *)bh->b_data;
1493                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1494                                 path[level].bp_sib_bh = bh;
1495                                 path[level].bp_op = nilfs_btree_borrow_right;
1496                                 stats->bs_nblocks++;
1497                                 goto out;
1498                         } else {
1499                                 path[level].bp_sib_bh = bh;
1500                                 path[level].bp_op = nilfs_btree_concat_right;
1501                                 stats->bs_nblocks++;
1502                                 /*
1503                                  * When merging right sibling node
1504                                  * into the current node, pointer to
1505                                  * the right sibling node must be
1506                                  * terminated instead.  The adjustment
1507                                  * below is required for that.
1508                                  */
1509                                 dindex = pindex + 1;
1510                                 /* continue; */
1511                         }
1512                 } else {
1513                         /* no siblings */
1514                         /* the only child of the root node */
1515                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1516                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1517                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1518                                 path[level].bp_op = nilfs_btree_shrink;
1519                                 stats->bs_nblocks += 2;
1520                                 level++;
1521                                 path[level].bp_op = nilfs_btree_nop;
1522                                 goto shrink_root_child;
1523                         } else {
1524                                 path[level].bp_op = nilfs_btree_do_delete;
1525                                 stats->bs_nblocks++;
1526                                 goto out;
1527                         }
1528                 }
1529         }
1530
1531         /* child of the root node is deleted */
1532         path[level].bp_op = nilfs_btree_do_delete;
1533         stats->bs_nblocks++;
1534
1535 shrink_root_child:
1536         node = nilfs_btree_get_root(btree);
1537         path[level].bp_oldreq.bpr_ptr =
1538                 nilfs_btree_node_get_ptr(node, dindex,
1539                                          NILFS_BTREE_ROOT_NCHILDREN_MAX);
1540
1541         ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1542         if (ret < 0)
1543                 goto err_out_child_node;
1544
1545         /* success */
1546  out:
1547         *levelp = level;
1548         return ret;
1549
1550         /* error */
1551  err_out_curr_node:
1552         nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1553  err_out_child_node:
1554         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1555                 brelse(path[level].bp_sib_bh);
1556                 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1557         }
1558         *levelp = level;
1559         stats->bs_nblocks = 0;
1560         return ret;
1561 }
1562
1563 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1564                                       struct nilfs_btree_path *path,
1565                                       int maxlevel, struct inode *dat)
1566 {
1567         int level;
1568
1569         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1570                 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1571                 path[level].bp_op(btree, path, level, NULL, NULL);
1572         }
1573
1574         if (!nilfs_bmap_dirty(btree))
1575                 nilfs_bmap_set_dirty(btree);
1576 }
1577
1578 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1579
1580 {
1581         struct nilfs_btree_path *path;
1582         struct nilfs_bmap_stats stats;
1583         struct inode *dat;
1584         int level, ret;
1585
1586         path = nilfs_btree_alloc_path();
1587         if (path == NULL)
1588                 return -ENOMEM;
1589
1590         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1591                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1592         if (ret < 0)
1593                 goto out;
1594
1595
1596         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1597
1598         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1599         if (ret < 0)
1600                 goto out;
1601         nilfs_btree_commit_delete(btree, path, level, dat);
1602         nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1603
1604 out:
1605         nilfs_btree_free_path(path);
1606         return ret;
1607 }
1608
1609 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1610                                 __u64 *keyp)
1611 {
1612         struct nilfs_btree_path *path;
1613         const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1614         int ret;
1615
1616         path = nilfs_btree_alloc_path();
1617         if (!path)
1618                 return -ENOMEM;
1619
1620         ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1621         if (!ret)
1622                 *keyp = start;
1623         else if (ret == -ENOENT)
1624                 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1625
1626         nilfs_btree_free_path(path);
1627         return ret;
1628 }
1629
1630 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1631 {
1632         struct nilfs_btree_path *path;
1633         int ret;
1634
1635         path = nilfs_btree_alloc_path();
1636         if (path == NULL)
1637                 return -ENOMEM;
1638
1639         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1640
1641         nilfs_btree_free_path(path);
1642
1643         return ret;
1644 }
1645
1646 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1647 {
1648         struct buffer_head *bh;
1649         struct nilfs_btree_node *root, *node;
1650         __u64 maxkey, nextmaxkey;
1651         __u64 ptr;
1652         int nchildren, ret;
1653
1654         root = nilfs_btree_get_root(btree);
1655         switch (nilfs_btree_height(btree)) {
1656         case 2:
1657                 bh = NULL;
1658                 node = root;
1659                 break;
1660         case 3:
1661                 nchildren = nilfs_btree_node_get_nchildren(root);
1662                 if (nchildren > 1)
1663                         return 0;
1664                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1665                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1666                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1667                 if (ret < 0)
1668                         return ret;
1669                 node = (struct nilfs_btree_node *)bh->b_data;
1670                 break;
1671         default:
1672                 return 0;
1673         }
1674
1675         nchildren = nilfs_btree_node_get_nchildren(node);
1676         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1677         nextmaxkey = (nchildren > 1) ?
1678                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1679         if (bh != NULL)
1680                 brelse(bh);
1681
1682         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1683 }
1684
1685 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1686                                    __u64 *keys, __u64 *ptrs, int nitems)
1687 {
1688         struct buffer_head *bh;
1689         struct nilfs_btree_node *node, *root;
1690         __le64 *dkeys;
1691         __le64 *dptrs;
1692         __u64 ptr;
1693         int nchildren, ncmax, i, ret;
1694
1695         root = nilfs_btree_get_root(btree);
1696         switch (nilfs_btree_height(btree)) {
1697         case 2:
1698                 bh = NULL;
1699                 node = root;
1700                 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1701                 break;
1702         case 3:
1703                 nchildren = nilfs_btree_node_get_nchildren(root);
1704                 WARN_ON(nchildren > 1);
1705                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1706                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1707                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1708                 if (ret < 0)
1709                         return ret;
1710                 node = (struct nilfs_btree_node *)bh->b_data;
1711                 ncmax = nilfs_btree_nchildren_per_block(btree);
1712                 break;
1713         default:
1714                 node = NULL;
1715                 return -EINVAL;
1716         }
1717
1718         nchildren = nilfs_btree_node_get_nchildren(node);
1719         if (nchildren < nitems)
1720                 nitems = nchildren;
1721         dkeys = nilfs_btree_node_dkeys(node);
1722         dptrs = nilfs_btree_node_dptrs(node, ncmax);
1723         for (i = 0; i < nitems; i++) {
1724                 keys[i] = le64_to_cpu(dkeys[i]);
1725                 ptrs[i] = le64_to_cpu(dptrs[i]);
1726         }
1727
1728         if (bh != NULL)
1729                 brelse(bh);
1730
1731         return nitems;
1732 }
1733
1734 static int
1735 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1736                                        union nilfs_bmap_ptr_req *dreq,
1737                                        union nilfs_bmap_ptr_req *nreq,
1738                                        struct buffer_head **bhp,
1739                                        struct nilfs_bmap_stats *stats)
1740 {
1741         struct buffer_head *bh;
1742         struct inode *dat = NULL;
1743         int ret;
1744
1745         stats->bs_nblocks = 0;
1746
1747         /* for data */
1748         /* cannot find near ptr */
1749         if (NILFS_BMAP_USE_VBN(btree)) {
1750                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1751                 dat = nilfs_bmap_get_dat(btree);
1752         }
1753
1754         ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1755         if (ret < 0)
1756                 return ret;
1757
1758         *bhp = NULL;
1759         stats->bs_nblocks++;
1760         if (nreq != NULL) {
1761                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1762                 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1763                 if (ret < 0)
1764                         goto err_out_dreq;
1765
1766                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1767                 if (ret < 0)
1768                         goto err_out_nreq;
1769
1770                 *bhp = bh;
1771                 stats->bs_nblocks++;
1772         }
1773
1774         /* success */
1775         return 0;
1776
1777         /* error */
1778  err_out_nreq:
1779         nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1780  err_out_dreq:
1781         nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1782         stats->bs_nblocks = 0;
1783         return ret;
1784
1785 }
1786
1787 static void
1788 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1789                                       __u64 key, __u64 ptr,
1790                                       const __u64 *keys, const __u64 *ptrs,
1791                                       int n,
1792                                       union nilfs_bmap_ptr_req *dreq,
1793                                       union nilfs_bmap_ptr_req *nreq,
1794                                       struct buffer_head *bh)
1795 {
1796         struct nilfs_btree_node *node;
1797         struct inode *dat;
1798         __u64 tmpptr;
1799         int ncblk;
1800
1801         /* free resources */
1802         if (btree->b_ops->bop_clear != NULL)
1803                 btree->b_ops->bop_clear(btree);
1804
1805         /* ptr must be a pointer to a buffer head. */
1806         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1807
1808         /* convert and insert */
1809         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1810         __nilfs_btree_init(btree);
1811         if (nreq != NULL) {
1812                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1813                 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1814
1815                 /* create child node at level 1 */
1816                 node = (struct nilfs_btree_node *)bh->b_data;
1817                 ncblk = nilfs_btree_nchildren_per_block(btree);
1818                 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1819                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1820                 if (!buffer_dirty(bh))
1821                         mark_buffer_dirty(bh);
1822                 if (!nilfs_bmap_dirty(btree))
1823                         nilfs_bmap_set_dirty(btree);
1824
1825                 brelse(bh);
1826
1827                 /* create root node at level 2 */
1828                 node = nilfs_btree_get_root(btree);
1829                 tmpptr = nreq->bpr_ptr;
1830                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1831                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1832                                       &keys[0], &tmpptr);
1833         } else {
1834                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1835
1836                 /* create root node at level 1 */
1837                 node = nilfs_btree_get_root(btree);
1838                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1839                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1840                                       keys, ptrs);
1841                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1842                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1843                 if (!nilfs_bmap_dirty(btree))
1844                         nilfs_bmap_set_dirty(btree);
1845         }
1846
1847         if (NILFS_BMAP_USE_VBN(btree))
1848                 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1849 }
1850
1851 /**
1852  * nilfs_btree_convert_and_insert -
1853  * @bmap:
1854  * @key:
1855  * @ptr:
1856  * @keys:
1857  * @ptrs:
1858  * @n:
1859  */
1860 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1861                                    __u64 key, __u64 ptr,
1862                                    const __u64 *keys, const __u64 *ptrs, int n)
1863 {
1864         struct buffer_head *bh = NULL;
1865         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1866         struct nilfs_bmap_stats stats;
1867         int ret;
1868
1869         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1870                 di = &dreq;
1871                 ni = NULL;
1872         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1873                            nilfs_btree_node_size(btree))) {
1874                 di = &dreq;
1875                 ni = &nreq;
1876         } else {
1877                 di = NULL;
1878                 ni = NULL;
1879                 BUG();
1880         }
1881
1882         ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1883                                                      &stats);
1884         if (ret < 0)
1885                 return ret;
1886         nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1887                                               di, ni, bh);
1888         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1889         return 0;
1890 }
1891
1892 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1893                                    struct nilfs_btree_path *path,
1894                                    int level,
1895                                    struct buffer_head *bh)
1896 {
1897         while ((++level < nilfs_btree_height(btree) - 1) &&
1898                !buffer_dirty(path[level].bp_bh))
1899                 mark_buffer_dirty(path[level].bp_bh);
1900
1901         return 0;
1902 }
1903
1904 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1905                                         struct nilfs_btree_path *path,
1906                                         int level, struct inode *dat)
1907 {
1908         struct nilfs_btree_node *parent;
1909         int ncmax, ret;
1910
1911         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1912         path[level].bp_oldreq.bpr_ptr =
1913                 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1914                                          ncmax);
1915         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1916         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1917                                        &path[level].bp_newreq.bpr_req);
1918         if (ret < 0)
1919                 return ret;
1920
1921         if (buffer_nilfs_node(path[level].bp_bh)) {
1922                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1923                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1924                 path[level].bp_ctxt.bh = path[level].bp_bh;
1925                 ret = nilfs_btnode_prepare_change_key(
1926                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1927                         &path[level].bp_ctxt);
1928                 if (ret < 0) {
1929                         nilfs_dat_abort_update(dat,
1930                                                &path[level].bp_oldreq.bpr_req,
1931                                                &path[level].bp_newreq.bpr_req);
1932                         return ret;
1933                 }
1934         }
1935
1936         return 0;
1937 }
1938
1939 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1940                                         struct nilfs_btree_path *path,
1941                                         int level, struct inode *dat)
1942 {
1943         struct nilfs_btree_node *parent;
1944         int ncmax;
1945
1946         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1947                                 &path[level].bp_newreq.bpr_req,
1948                                 btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1949
1950         if (buffer_nilfs_node(path[level].bp_bh)) {
1951                 nilfs_btnode_commit_change_key(
1952                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1953                         &path[level].bp_ctxt);
1954                 path[level].bp_bh = path[level].bp_ctxt.bh;
1955         }
1956         set_buffer_nilfs_volatile(path[level].bp_bh);
1957
1958         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1959         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1960                                  path[level].bp_newreq.bpr_ptr, ncmax);
1961 }
1962
1963 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1964                                        struct nilfs_btree_path *path,
1965                                        int level, struct inode *dat)
1966 {
1967         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1968                                &path[level].bp_newreq.bpr_req);
1969         if (buffer_nilfs_node(path[level].bp_bh))
1970                 nilfs_btnode_abort_change_key(
1971                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1972                         &path[level].bp_ctxt);
1973 }
1974
1975 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1976                                            struct nilfs_btree_path *path,
1977                                            int minlevel, int *maxlevelp,
1978                                            struct inode *dat)
1979 {
1980         int level, ret;
1981
1982         level = minlevel;
1983         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1984                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1985                 if (ret < 0)
1986                         return ret;
1987         }
1988         while ((++level < nilfs_btree_height(btree) - 1) &&
1989                !buffer_dirty(path[level].bp_bh)) {
1990
1991                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1992                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1993                 if (ret < 0)
1994                         goto out;
1995         }
1996
1997         /* success */
1998         *maxlevelp = level - 1;
1999         return 0;
2000
2001         /* error */
2002  out:
2003         while (--level > minlevel)
2004                 nilfs_btree_abort_update_v(btree, path, level, dat);
2005         if (!buffer_nilfs_volatile(path[level].bp_bh))
2006                 nilfs_btree_abort_update_v(btree, path, level, dat);
2007         return ret;
2008 }
2009
2010 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2011                                            struct nilfs_btree_path *path,
2012                                            int minlevel, int maxlevel,
2013                                            struct buffer_head *bh,
2014                                            struct inode *dat)
2015 {
2016         int level;
2017
2018         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2019                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2020
2021         for (level = minlevel + 1; level <= maxlevel; level++)
2022                 nilfs_btree_commit_update_v(btree, path, level, dat);
2023 }
2024
2025 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2026                                    struct nilfs_btree_path *path,
2027                                    int level, struct buffer_head *bh)
2028 {
2029         int maxlevel = 0, ret;
2030         struct nilfs_btree_node *parent;
2031         struct inode *dat = nilfs_bmap_get_dat(btree);
2032         __u64 ptr;
2033         int ncmax;
2034
2035         get_bh(bh);
2036         path[level].bp_bh = bh;
2037         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2038                                               dat);
2039         if (ret < 0)
2040                 goto out;
2041
2042         if (buffer_nilfs_volatile(path[level].bp_bh)) {
2043                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2044                 ptr = nilfs_btree_node_get_ptr(parent,
2045                                                path[level + 1].bp_index,
2046                                                ncmax);
2047                 ret = nilfs_dat_mark_dirty(dat, ptr);
2048                 if (ret < 0)
2049                         goto out;
2050         }
2051
2052         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2053
2054  out:
2055         brelse(path[level].bp_bh);
2056         path[level].bp_bh = NULL;
2057         return ret;
2058 }
2059
2060 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2061                                  struct buffer_head *bh)
2062 {
2063         struct nilfs_btree_path *path;
2064         struct nilfs_btree_node *node;
2065         __u64 key;
2066         int level, ret;
2067
2068         WARN_ON(!buffer_dirty(bh));
2069
2070         path = nilfs_btree_alloc_path();
2071         if (path == NULL)
2072                 return -ENOMEM;
2073
2074         if (buffer_nilfs_node(bh)) {
2075                 node = (struct nilfs_btree_node *)bh->b_data;
2076                 key = nilfs_btree_node_get_key(node, 0);
2077                 level = nilfs_btree_node_get_level(node);
2078         } else {
2079                 key = nilfs_bmap_data_get_key(btree, bh);
2080                 level = NILFS_BTREE_LEVEL_DATA;
2081         }
2082
2083         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2084         if (ret < 0) {
2085                 if (unlikely(ret == -ENOENT))
2086                         nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
2087                                   "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2088                                   btree->b_inode->i_ino,
2089                                   (unsigned long long)key, level);
2090                 goto out;
2091         }
2092
2093         ret = NILFS_BMAP_USE_VBN(btree) ?
2094                 nilfs_btree_propagate_v(btree, path, level, bh) :
2095                 nilfs_btree_propagate_p(btree, path, level, bh);
2096
2097  out:
2098         nilfs_btree_free_path(path);
2099
2100         return ret;
2101 }
2102
2103 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2104                                     struct buffer_head *bh)
2105 {
2106         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2107 }
2108
2109 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2110                                          struct list_head *lists,
2111                                          struct buffer_head *bh)
2112 {
2113         struct list_head *head;
2114         struct buffer_head *cbh;
2115         struct nilfs_btree_node *node, *cnode;
2116         __u64 key, ckey;
2117         int level;
2118
2119         get_bh(bh);
2120         node = (struct nilfs_btree_node *)bh->b_data;
2121         key = nilfs_btree_node_get_key(node, 0);
2122         level = nilfs_btree_node_get_level(node);
2123         if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2124             level >= NILFS_BTREE_LEVEL_MAX) {
2125                 dump_stack();
2126                 nilfs_msg(btree->b_inode->i_sb, KERN_WARNING,
2127                           "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2128                           level, (unsigned long long)key,
2129                           btree->b_inode->i_ino,
2130                           (unsigned long long)bh->b_blocknr);
2131                 return;
2132         }
2133
2134         list_for_each(head, &lists[level]) {
2135                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2136                 cnode = (struct nilfs_btree_node *)cbh->b_data;
2137                 ckey = nilfs_btree_node_get_key(cnode, 0);
2138                 if (key < ckey)
2139                         break;
2140         }
2141         list_add_tail(&bh->b_assoc_buffers, head);
2142 }
2143
2144 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2145                                              struct list_head *listp)
2146 {
2147         struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
2148         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2149         struct pagevec pvec;
2150         struct buffer_head *bh, *head;
2151         pgoff_t index = 0;
2152         int level, i;
2153
2154         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2155              level < NILFS_BTREE_LEVEL_MAX;
2156              level++)
2157                 INIT_LIST_HEAD(&lists[level]);
2158
2159         pagevec_init(&pvec, 0);
2160
2161         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2162                                   PAGEVEC_SIZE)) {
2163                 for (i = 0; i < pagevec_count(&pvec); i++) {
2164                         bh = head = page_buffers(pvec.pages[i]);
2165                         do {
2166                                 if (buffer_dirty(bh))
2167                                         nilfs_btree_add_dirty_buffer(btree,
2168                                                                      lists, bh);
2169                         } while ((bh = bh->b_this_page) != head);
2170                 }
2171                 pagevec_release(&pvec);
2172                 cond_resched();
2173         }
2174
2175         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2176              level < NILFS_BTREE_LEVEL_MAX;
2177              level++)
2178                 list_splice_tail(&lists[level], listp);
2179 }
2180
2181 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2182                                 struct nilfs_btree_path *path,
2183                                 int level,
2184                                 struct buffer_head **bh,
2185                                 sector_t blocknr,
2186                                 union nilfs_binfo *binfo)
2187 {
2188         struct nilfs_btree_node *parent;
2189         __u64 key;
2190         __u64 ptr;
2191         int ncmax, ret;
2192
2193         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2194         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2195                                        ncmax);
2196         if (buffer_nilfs_node(*bh)) {
2197                 path[level].bp_ctxt.oldkey = ptr;
2198                 path[level].bp_ctxt.newkey = blocknr;
2199                 path[level].bp_ctxt.bh = *bh;
2200                 ret = nilfs_btnode_prepare_change_key(
2201                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2202                         &path[level].bp_ctxt);
2203                 if (ret < 0)
2204                         return ret;
2205                 nilfs_btnode_commit_change_key(
2206                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2207                         &path[level].bp_ctxt);
2208                 *bh = path[level].bp_ctxt.bh;
2209         }
2210
2211         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2212                                  ncmax);
2213
2214         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2215         /* on-disk format */
2216         binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2217         binfo->bi_dat.bi_level = level;
2218
2219         return 0;
2220 }
2221
2222 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2223                                 struct nilfs_btree_path *path,
2224                                 int level,
2225                                 struct buffer_head **bh,
2226                                 sector_t blocknr,
2227                                 union nilfs_binfo *binfo)
2228 {
2229         struct nilfs_btree_node *parent;
2230         struct inode *dat = nilfs_bmap_get_dat(btree);
2231         __u64 key;
2232         __u64 ptr;
2233         union nilfs_bmap_ptr_req req;
2234         int ncmax, ret;
2235
2236         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2237         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2238                                        ncmax);
2239         req.bpr_ptr = ptr;
2240         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2241         if (ret < 0)
2242                 return ret;
2243         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2244
2245         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2246         /* on-disk format */
2247         binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2248         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2249
2250         return 0;
2251 }
2252
2253 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2254                               struct buffer_head **bh,
2255                               sector_t blocknr,
2256                               union nilfs_binfo *binfo)
2257 {
2258         struct nilfs_btree_path *path;
2259         struct nilfs_btree_node *node;
2260         __u64 key;
2261         int level, ret;
2262
2263         path = nilfs_btree_alloc_path();
2264         if (path == NULL)
2265                 return -ENOMEM;
2266
2267         if (buffer_nilfs_node(*bh)) {
2268                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2269                 key = nilfs_btree_node_get_key(node, 0);
2270                 level = nilfs_btree_node_get_level(node);
2271         } else {
2272                 key = nilfs_bmap_data_get_key(btree, *bh);
2273                 level = NILFS_BTREE_LEVEL_DATA;
2274         }
2275
2276         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2277         if (ret < 0) {
2278                 WARN_ON(ret == -ENOENT);
2279                 goto out;
2280         }
2281
2282         ret = NILFS_BMAP_USE_VBN(btree) ?
2283                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2284                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2285
2286  out:
2287         nilfs_btree_free_path(path);
2288
2289         return ret;
2290 }
2291
2292 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2293                                  struct buffer_head **bh,
2294                                  sector_t blocknr,
2295                                  union nilfs_binfo *binfo)
2296 {
2297         struct nilfs_btree_node *node;
2298         __u64 key;
2299         int ret;
2300
2301         ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2302                              blocknr);
2303         if (ret < 0)
2304                 return ret;
2305
2306         if (buffer_nilfs_node(*bh)) {
2307                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2308                 key = nilfs_btree_node_get_key(node, 0);
2309         } else
2310                 key = nilfs_bmap_data_get_key(btree, *bh);
2311
2312         /* on-disk format */
2313         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2314         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2315
2316         return 0;
2317 }
2318
2319 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2320 {
2321         struct buffer_head *bh;
2322         struct nilfs_btree_path *path;
2323         __u64 ptr;
2324         int ret;
2325
2326         path = nilfs_btree_alloc_path();
2327         if (path == NULL)
2328                 return -ENOMEM;
2329
2330         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2331         if (ret < 0) {
2332                 WARN_ON(ret == -ENOENT);
2333                 goto out;
2334         }
2335         ret = nilfs_btree_get_block(btree, ptr, &bh);
2336         if (ret < 0) {
2337                 WARN_ON(ret == -ENOENT);
2338                 goto out;
2339         }
2340
2341         if (!buffer_dirty(bh))
2342                 mark_buffer_dirty(bh);
2343         brelse(bh);
2344         if (!nilfs_bmap_dirty(btree))
2345                 nilfs_bmap_set_dirty(btree);
2346
2347  out:
2348         nilfs_btree_free_path(path);
2349         return ret;
2350 }
2351
2352 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2353         .bop_lookup             =       nilfs_btree_lookup,
2354         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2355         .bop_insert             =       nilfs_btree_insert,
2356         .bop_delete             =       nilfs_btree_delete,
2357         .bop_clear              =       NULL,
2358
2359         .bop_propagate          =       nilfs_btree_propagate,
2360
2361         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2362
2363         .bop_assign             =       nilfs_btree_assign,
2364         .bop_mark               =       nilfs_btree_mark,
2365
2366         .bop_seek_key           =       nilfs_btree_seek_key,
2367         .bop_last_key           =       nilfs_btree_last_key,
2368
2369         .bop_check_insert       =       NULL,
2370         .bop_check_delete       =       nilfs_btree_check_delete,
2371         .bop_gather_data        =       nilfs_btree_gather_data,
2372 };
2373
2374 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2375         .bop_lookup             =       NULL,
2376         .bop_lookup_contig      =       NULL,
2377         .bop_insert             =       NULL,
2378         .bop_delete             =       NULL,
2379         .bop_clear              =       NULL,
2380
2381         .bop_propagate          =       nilfs_btree_propagate_gc,
2382
2383         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2384
2385         .bop_assign             =       nilfs_btree_assign_gc,
2386         .bop_mark               =       NULL,
2387
2388         .bop_seek_key           =       NULL,
2389         .bop_last_key           =       NULL,
2390
2391         .bop_check_insert       =       NULL,
2392         .bop_check_delete       =       NULL,
2393         .bop_gather_data        =       NULL,
2394 };
2395
2396 static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2397 {
2398         bmap->b_ops = &nilfs_btree_ops;
2399         bmap->b_nchildren_per_block =
2400                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2401 }
2402
2403 int nilfs_btree_init(struct nilfs_bmap *bmap)
2404 {
2405         int ret = 0;
2406
2407         __nilfs_btree_init(bmap);
2408
2409         if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2410                 ret = -EIO;
2411         return ret;
2412 }
2413
2414 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2415 {
2416         bmap->b_ops = &nilfs_btree_ops_gc;
2417         bmap->b_nchildren_per_block =
2418                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2419 }