]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/xfs/xfs_dir2_leaf.c
Merge tag 'for-linus-v3.9-rc4' of git://oss.sgi.com/xfs/xfs
[karo-tx-linux.git] / fs / xfs / xfs_dir2_leaf.c
1 /*
2  * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_mount.h"
27 #include "xfs_da_btree.h"
28 #include "xfs_bmap_btree.h"
29 #include "xfs_dinode.h"
30 #include "xfs_inode.h"
31 #include "xfs_bmap.h"
32 #include "xfs_dir2_format.h"
33 #include "xfs_dir2_priv.h"
34 #include "xfs_error.h"
35 #include "xfs_trace.h"
36
37 /*
38  * Local function declarations.
39  */
40 #ifdef DEBUG
41 static void xfs_dir2_leaf_check(struct xfs_inode *dp, struct xfs_buf *bp);
42 #else
43 #define xfs_dir2_leaf_check(dp, bp)
44 #endif
45 static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
46                                     int *indexp, struct xfs_buf **dbpp);
47 static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp,
48                                     int first, int last);
49 static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp);
50
51 static void
52 xfs_dir2_leaf_verify(
53         struct xfs_buf          *bp,
54         __be16                  magic)
55 {
56         struct xfs_mount        *mp = bp->b_target->bt_mount;
57         struct xfs_dir2_leaf_hdr *hdr = bp->b_addr;
58         int                     block_ok = 0;
59
60         block_ok = hdr->info.magic == magic;
61         if (!block_ok) {
62                 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
63                 xfs_buf_ioerror(bp, EFSCORRUPTED);
64         }
65 }
66
67 static void
68 xfs_dir2_leaf1_read_verify(
69         struct xfs_buf  *bp)
70 {
71         xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
72 }
73
74 static void
75 xfs_dir2_leaf1_write_verify(
76         struct xfs_buf  *bp)
77 {
78         xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
79 }
80
81 void
82 xfs_dir2_leafn_read_verify(
83         struct xfs_buf  *bp)
84 {
85         xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
86 }
87
88 void
89 xfs_dir2_leafn_write_verify(
90         struct xfs_buf  *bp)
91 {
92         xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
93 }
94
95 static const struct xfs_buf_ops xfs_dir2_leaf1_buf_ops = {
96         .verify_read = xfs_dir2_leaf1_read_verify,
97         .verify_write = xfs_dir2_leaf1_write_verify,
98 };
99
100 const struct xfs_buf_ops xfs_dir2_leafn_buf_ops = {
101         .verify_read = xfs_dir2_leafn_read_verify,
102         .verify_write = xfs_dir2_leafn_write_verify,
103 };
104
105 static int
106 xfs_dir2_leaf_read(
107         struct xfs_trans        *tp,
108         struct xfs_inode        *dp,
109         xfs_dablk_t             fbno,
110         xfs_daddr_t             mappedbno,
111         struct xfs_buf          **bpp)
112 {
113         return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
114                                 XFS_DATA_FORK, &xfs_dir2_leaf1_buf_ops);
115 }
116
117 int
118 xfs_dir2_leafn_read(
119         struct xfs_trans        *tp,
120         struct xfs_inode        *dp,
121         xfs_dablk_t             fbno,
122         xfs_daddr_t             mappedbno,
123         struct xfs_buf          **bpp)
124 {
125         return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
126                                 XFS_DATA_FORK, &xfs_dir2_leafn_buf_ops);
127 }
128
129 /*
130  * Convert a block form directory to a leaf form directory.
131  */
132 int                                             /* error */
133 xfs_dir2_block_to_leaf(
134         xfs_da_args_t           *args,          /* operation arguments */
135         struct xfs_buf          *dbp)           /* input block's buffer */
136 {
137         __be16                  *bestsp;        /* leaf's bestsp entries */
138         xfs_dablk_t             blkno;          /* leaf block's bno */
139         xfs_dir2_data_hdr_t     *hdr;           /* block header */
140         xfs_dir2_leaf_entry_t   *blp;           /* block's leaf entries */
141         xfs_dir2_block_tail_t   *btp;           /* block's tail */
142         xfs_inode_t             *dp;            /* incore directory inode */
143         int                     error;          /* error return code */
144         struct xfs_buf          *lbp;           /* leaf block's buffer */
145         xfs_dir2_db_t           ldb;            /* leaf block's bno */
146         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
147         xfs_dir2_leaf_tail_t    *ltp;           /* leaf's tail */
148         xfs_mount_t             *mp;            /* filesystem mount point */
149         int                     needlog;        /* need to log block header */
150         int                     needscan;       /* need to rescan bestfree */
151         xfs_trans_t             *tp;            /* transaction pointer */
152
153         trace_xfs_dir2_block_to_leaf(args);
154
155         dp = args->dp;
156         mp = dp->i_mount;
157         tp = args->trans;
158         /*
159          * Add the leaf block to the inode.
160          * This interface will only put blocks in the leaf/node range.
161          * Since that's empty now, we'll get the root (block 0 in range).
162          */
163         if ((error = xfs_da_grow_inode(args, &blkno))) {
164                 return error;
165         }
166         ldb = xfs_dir2_da_to_db(mp, blkno);
167         ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
168         /*
169          * Initialize the leaf block, get a buffer for it.
170          */
171         if ((error = xfs_dir2_leaf_init(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC))) {
172                 return error;
173         }
174         ASSERT(lbp != NULL);
175         leaf = lbp->b_addr;
176         hdr = dbp->b_addr;
177         xfs_dir2_data_check(dp, dbp);
178         btp = xfs_dir2_block_tail_p(mp, hdr);
179         blp = xfs_dir2_block_leaf_p(btp);
180         /*
181          * Set the counts in the leaf header.
182          */
183         leaf->hdr.count = cpu_to_be16(be32_to_cpu(btp->count));
184         leaf->hdr.stale = cpu_to_be16(be32_to_cpu(btp->stale));
185         /*
186          * Could compact these but I think we always do the conversion
187          * after squeezing out stale entries.
188          */
189         memcpy(leaf->ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
190         xfs_dir2_leaf_log_ents(tp, lbp, 0, be16_to_cpu(leaf->hdr.count) - 1);
191         needscan = 0;
192         needlog = 1;
193         /*
194          * Make the space formerly occupied by the leaf entries and block
195          * tail be free.
196          */
197         xfs_dir2_data_make_free(tp, dbp,
198                 (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
199                 (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize -
200                                        (char *)blp),
201                 &needlog, &needscan);
202         /*
203          * Fix up the block header, make it a data block.
204          */
205         dbp->b_ops = &xfs_dir2_data_buf_ops;
206         hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
207         if (needscan)
208                 xfs_dir2_data_freescan(mp, hdr, &needlog);
209         /*
210          * Set up leaf tail and bests table.
211          */
212         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
213         ltp->bestcount = cpu_to_be32(1);
214         bestsp = xfs_dir2_leaf_bests_p(ltp);
215         bestsp[0] =  hdr->bestfree[0].length;
216         /*
217          * Log the data header and leaf bests table.
218          */
219         if (needlog)
220                 xfs_dir2_data_log_header(tp, dbp);
221         xfs_dir2_leaf_check(dp, lbp);
222         xfs_dir2_data_check(dp, dbp);
223         xfs_dir2_leaf_log_bests(tp, lbp, 0, 0);
224         return 0;
225 }
226
227 STATIC void
228 xfs_dir2_leaf_find_stale(
229         struct xfs_dir2_leaf    *leaf,
230         int                     index,
231         int                     *lowstale,
232         int                     *highstale)
233 {
234         /*
235          * Find the first stale entry before our index, if any.
236          */
237         for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
238                 if (leaf->ents[*lowstale].address ==
239                     cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
240                         break;
241         }
242
243         /*
244          * Find the first stale entry at or after our index, if any.
245          * Stop if the result would require moving more entries than using
246          * lowstale.
247          */
248         for (*highstale = index;
249              *highstale < be16_to_cpu(leaf->hdr.count);
250              ++*highstale) {
251                 if (leaf->ents[*highstale].address ==
252                     cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
253                         break;
254                 if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
255                         break;
256         }
257 }
258
259 struct xfs_dir2_leaf_entry *
260 xfs_dir2_leaf_find_entry(
261         xfs_dir2_leaf_t         *leaf,          /* leaf structure */
262         int                     index,          /* leaf table position */
263         int                     compact,        /* need to compact leaves */
264         int                     lowstale,       /* index of prev stale leaf */
265         int                     highstale,      /* index of next stale leaf */
266         int                     *lfloglow,      /* low leaf logging index */
267         int                     *lfloghigh)     /* high leaf logging index */
268 {
269         if (!leaf->hdr.stale) {
270                 xfs_dir2_leaf_entry_t   *lep;   /* leaf entry table pointer */
271
272                 /*
273                  * Now we need to make room to insert the leaf entry.
274                  *
275                  * If there are no stale entries, just insert a hole at index.
276                  */
277                 lep = &leaf->ents[index];
278                 if (index < be16_to_cpu(leaf->hdr.count))
279                         memmove(lep + 1, lep,
280                                 (be16_to_cpu(leaf->hdr.count) - index) *
281                                  sizeof(*lep));
282
283                 /*
284                  * Record low and high logging indices for the leaf.
285                  */
286                 *lfloglow = index;
287                 *lfloghigh = be16_to_cpu(leaf->hdr.count);
288                 be16_add_cpu(&leaf->hdr.count, 1);
289                 return lep;
290         }
291
292         /*
293          * There are stale entries.
294          *
295          * We will use one of them for the new entry.  It's probably not at
296          * the right location, so we'll have to shift some up or down first.
297          *
298          * If we didn't compact before, we need to find the nearest stale
299          * entries before and after our insertion point.
300          */
301         if (compact == 0)
302                 xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
303
304         /*
305          * If the low one is better, use it.
306          */
307         if (lowstale >= 0 &&
308             (highstale == be16_to_cpu(leaf->hdr.count) ||
309              index - lowstale - 1 < highstale - index)) {
310                 ASSERT(index - lowstale - 1 >= 0);
311                 ASSERT(leaf->ents[lowstale].address ==
312                        cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
313
314                 /*
315                  * Copy entries up to cover the stale entry and make room
316                  * for the new entry.
317                  */
318                 if (index - lowstale - 1 > 0) {
319                         memmove(&leaf->ents[lowstale],
320                                 &leaf->ents[lowstale + 1],
321                                 (index - lowstale - 1) *
322                                 sizeof(xfs_dir2_leaf_entry_t));
323                 }
324                 *lfloglow = MIN(lowstale, *lfloglow);
325                 *lfloghigh = MAX(index - 1, *lfloghigh);
326                 be16_add_cpu(&leaf->hdr.stale, -1);
327                 return &leaf->ents[index - 1];
328         }
329
330         /*
331          * The high one is better, so use that one.
332          */
333         ASSERT(highstale - index >= 0);
334         ASSERT(leaf->ents[highstale].address ==
335                cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
336
337         /*
338          * Copy entries down to cover the stale entry and make room for the
339          * new entry.
340          */
341         if (highstale - index > 0) {
342                 memmove(&leaf->ents[index + 1],
343                         &leaf->ents[index],
344                         (highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
345         }
346         *lfloglow = MIN(index, *lfloglow);
347         *lfloghigh = MAX(highstale, *lfloghigh);
348         be16_add_cpu(&leaf->hdr.stale, -1);
349         return &leaf->ents[index];
350 }
351
352 /*
353  * Add an entry to a leaf form directory.
354  */
355 int                                             /* error */
356 xfs_dir2_leaf_addname(
357         xfs_da_args_t           *args)          /* operation arguments */
358 {
359         __be16                  *bestsp;        /* freespace table in leaf */
360         int                     compact;        /* need to compact leaves */
361         xfs_dir2_data_hdr_t     *hdr;           /* data block header */
362         struct xfs_buf          *dbp;           /* data block buffer */
363         xfs_dir2_data_entry_t   *dep;           /* data block entry */
364         xfs_inode_t             *dp;            /* incore directory inode */
365         xfs_dir2_data_unused_t  *dup;           /* data unused entry */
366         int                     error;          /* error return value */
367         int                     grown;          /* allocated new data block */
368         int                     highstale;      /* index of next stale leaf */
369         int                     i;              /* temporary, index */
370         int                     index;          /* leaf table position */
371         struct xfs_buf          *lbp;           /* leaf's buffer */
372         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
373         int                     length;         /* length of new entry */
374         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry table pointer */
375         int                     lfloglow;       /* low leaf logging index */
376         int                     lfloghigh;      /* high leaf logging index */
377         int                     lowstale;       /* index of prev stale leaf */
378         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail pointer */
379         xfs_mount_t             *mp;            /* filesystem mount point */
380         int                     needbytes;      /* leaf block bytes needed */
381         int                     needlog;        /* need to log data header */
382         int                     needscan;       /* need to rescan data free */
383         __be16                  *tagp;          /* end of data entry */
384         xfs_trans_t             *tp;            /* transaction pointer */
385         xfs_dir2_db_t           use_block;      /* data block number */
386
387         trace_xfs_dir2_leaf_addname(args);
388
389         dp = args->dp;
390         tp = args->trans;
391         mp = dp->i_mount;
392
393         error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
394         if (error)
395                 return error;
396
397         /*
398          * Look up the entry by hash value and name.
399          * We know it's not there, our caller has already done a lookup.
400          * So the index is of the entry to insert in front of.
401          * But if there are dup hash values the index is of the first of those.
402          */
403         index = xfs_dir2_leaf_search_hash(args, lbp);
404         leaf = lbp->b_addr;
405         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
406         bestsp = xfs_dir2_leaf_bests_p(ltp);
407         length = xfs_dir2_data_entsize(args->namelen);
408         /*
409          * See if there are any entries with the same hash value
410          * and space in their block for the new entry.
411          * This is good because it puts multiple same-hash value entries
412          * in a data block, improving the lookup of those entries.
413          */
414         for (use_block = -1, lep = &leaf->ents[index];
415              index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
416              index++, lep++) {
417                 if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
418                         continue;
419                 i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
420                 ASSERT(i < be32_to_cpu(ltp->bestcount));
421                 ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
422                 if (be16_to_cpu(bestsp[i]) >= length) {
423                         use_block = i;
424                         break;
425                 }
426         }
427         /*
428          * Didn't find a block yet, linear search all the data blocks.
429          */
430         if (use_block == -1) {
431                 for (i = 0; i < be32_to_cpu(ltp->bestcount); i++) {
432                         /*
433                          * Remember a block we see that's missing.
434                          */
435                         if (bestsp[i] == cpu_to_be16(NULLDATAOFF) &&
436                             use_block == -1)
437                                 use_block = i;
438                         else if (be16_to_cpu(bestsp[i]) >= length) {
439                                 use_block = i;
440                                 break;
441                         }
442                 }
443         }
444         /*
445          * How many bytes do we need in the leaf block?
446          */
447         needbytes = 0;
448         if (!leaf->hdr.stale)
449                 needbytes += sizeof(xfs_dir2_leaf_entry_t);
450         if (use_block == -1)
451                 needbytes += sizeof(xfs_dir2_data_off_t);
452
453         /*
454          * Now kill use_block if it refers to a missing block, so we
455          * can use it as an indication of allocation needed.
456          */
457         if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF))
458                 use_block = -1;
459         /*
460          * If we don't have enough free bytes but we can make enough
461          * by compacting out stale entries, we'll do that.
462          */
463         if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
464                                 needbytes && be16_to_cpu(leaf->hdr.stale) > 1) {
465                 compact = 1;
466         }
467         /*
468          * Otherwise if we don't have enough free bytes we need to
469          * convert to node form.
470          */
471         else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(
472                                                 leaf->hdr.count)] < needbytes) {
473                 /*
474                  * Just checking or no space reservation, give up.
475                  */
476                 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
477                                                         args->total == 0) {
478                         xfs_trans_brelse(tp, lbp);
479                         return XFS_ERROR(ENOSPC);
480                 }
481                 /*
482                  * Convert to node form.
483                  */
484                 error = xfs_dir2_leaf_to_node(args, lbp);
485                 if (error)
486                         return error;
487                 /*
488                  * Then add the new entry.
489                  */
490                 return xfs_dir2_node_addname(args);
491         }
492         /*
493          * Otherwise it will fit without compaction.
494          */
495         else
496                 compact = 0;
497         /*
498          * If just checking, then it will fit unless we needed to allocate
499          * a new data block.
500          */
501         if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
502                 xfs_trans_brelse(tp, lbp);
503                 return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
504         }
505         /*
506          * If no allocations are allowed, return now before we've
507          * changed anything.
508          */
509         if (args->total == 0 && use_block == -1) {
510                 xfs_trans_brelse(tp, lbp);
511                 return XFS_ERROR(ENOSPC);
512         }
513         /*
514          * Need to compact the leaf entries, removing stale ones.
515          * Leave one stale entry behind - the one closest to our
516          * insertion index - and we'll shift that one to our insertion
517          * point later.
518          */
519         if (compact) {
520                 xfs_dir2_leaf_compact_x1(lbp, &index, &lowstale, &highstale,
521                         &lfloglow, &lfloghigh);
522         }
523         /*
524          * There are stale entries, so we'll need log-low and log-high
525          * impossibly bad values later.
526          */
527         else if (be16_to_cpu(leaf->hdr.stale)) {
528                 lfloglow = be16_to_cpu(leaf->hdr.count);
529                 lfloghigh = -1;
530         }
531         /*
532          * If there was no data block space found, we need to allocate
533          * a new one.
534          */
535         if (use_block == -1) {
536                 /*
537                  * Add the new data block.
538                  */
539                 if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
540                                 &use_block))) {
541                         xfs_trans_brelse(tp, lbp);
542                         return error;
543                 }
544                 /*
545                  * Initialize the block.
546                  */
547                 if ((error = xfs_dir2_data_init(args, use_block, &dbp))) {
548                         xfs_trans_brelse(tp, lbp);
549                         return error;
550                 }
551                 /*
552                  * If we're adding a new data block on the end we need to
553                  * extend the bests table.  Copy it up one entry.
554                  */
555                 if (use_block >= be32_to_cpu(ltp->bestcount)) {
556                         bestsp--;
557                         memmove(&bestsp[0], &bestsp[1],
558                                 be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
559                         be32_add_cpu(&ltp->bestcount, 1);
560                         xfs_dir2_leaf_log_tail(tp, lbp);
561                         xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
562                 }
563                 /*
564                  * If we're filling in a previously empty block just log it.
565                  */
566                 else
567                         xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
568                 hdr = dbp->b_addr;
569                 bestsp[use_block] = hdr->bestfree[0].length;
570                 grown = 1;
571         } else {
572                 /*
573                  * Already had space in some data block.
574                  * Just read that one in.
575                  */
576                 error = xfs_dir2_data_read(tp, dp,
577                                            xfs_dir2_db_to_da(mp, use_block),
578                                            -1, &dbp);
579                 if (error) {
580                         xfs_trans_brelse(tp, lbp);
581                         return error;
582                 }
583                 hdr = dbp->b_addr;
584                 grown = 0;
585         }
586         /*
587          * Point to the biggest freespace in our data block.
588          */
589         dup = (xfs_dir2_data_unused_t *)
590               ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset));
591         ASSERT(be16_to_cpu(dup->length) >= length);
592         needscan = needlog = 0;
593         /*
594          * Mark the initial part of our freespace in use for the new entry.
595          */
596         xfs_dir2_data_use_free(tp, dbp, dup,
597                 (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
598                 &needlog, &needscan);
599         /*
600          * Initialize our new entry (at last).
601          */
602         dep = (xfs_dir2_data_entry_t *)dup;
603         dep->inumber = cpu_to_be64(args->inumber);
604         dep->namelen = args->namelen;
605         memcpy(dep->name, args->name, dep->namelen);
606         tagp = xfs_dir2_data_entry_tag_p(dep);
607         *tagp = cpu_to_be16((char *)dep - (char *)hdr);
608         /*
609          * Need to scan fix up the bestfree table.
610          */
611         if (needscan)
612                 xfs_dir2_data_freescan(mp, hdr, &needlog);
613         /*
614          * Need to log the data block's header.
615          */
616         if (needlog)
617                 xfs_dir2_data_log_header(tp, dbp);
618         xfs_dir2_data_log_entry(tp, dbp, dep);
619         /*
620          * If the bests table needs to be changed, do it.
621          * Log the change unless we've already done that.
622          */
623         if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(hdr->bestfree[0].length)) {
624                 bestsp[use_block] = hdr->bestfree[0].length;
625                 if (!grown)
626                         xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
627         }
628
629         lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
630                                        highstale, &lfloglow, &lfloghigh);
631
632         /*
633          * Fill in the new leaf entry.
634          */
635         lep->hashval = cpu_to_be32(args->hashval);
636         lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block,
637                                 be16_to_cpu(*tagp)));
638         /*
639          * Log the leaf fields and give up the buffers.
640          */
641         xfs_dir2_leaf_log_header(tp, lbp);
642         xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh);
643         xfs_dir2_leaf_check(dp, lbp);
644         xfs_dir2_data_check(dp, dbp);
645         return 0;
646 }
647
648 #ifdef DEBUG
649 /*
650  * Check the internal consistency of a leaf1 block.
651  * Pop an assert if something is wrong.
652  */
653 STATIC void
654 xfs_dir2_leaf_check(
655         struct xfs_inode        *dp,            /* incore directory inode */
656         struct xfs_buf          *bp)            /* leaf's buffer */
657 {
658         int                     i;              /* leaf index */
659         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
660         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail pointer */
661         xfs_mount_t             *mp;            /* filesystem mount point */
662         int                     stale;          /* count of stale leaves */
663
664         leaf = bp->b_addr;
665         mp = dp->i_mount;
666         ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
667         /*
668          * This value is not restrictive enough.
669          * Should factor in the size of the bests table as well.
670          * We can deduce a value for that from di_size.
671          */
672         ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
673         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
674         /*
675          * Leaves and bests don't overlap.
676          */
677         ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <=
678                (char *)xfs_dir2_leaf_bests_p(ltp));
679         /*
680          * Check hash value order, count stale entries.
681          */
682         for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
683                 if (i + 1 < be16_to_cpu(leaf->hdr.count))
684                         ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
685                                be32_to_cpu(leaf->ents[i + 1].hashval));
686                 if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
687                         stale++;
688         }
689         ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
690 }
691 #endif  /* DEBUG */
692
693 /*
694  * Compact out any stale entries in the leaf.
695  * Log the header and changed leaf entries, if any.
696  */
697 void
698 xfs_dir2_leaf_compact(
699         xfs_da_args_t   *args,          /* operation arguments */
700         struct xfs_buf  *bp)            /* leaf buffer */
701 {
702         int             from;           /* source leaf index */
703         xfs_dir2_leaf_t *leaf;          /* leaf structure */
704         int             loglow;         /* first leaf entry to log */
705         int             to;             /* target leaf index */
706
707         leaf = bp->b_addr;
708         if (!leaf->hdr.stale) {
709                 return;
710         }
711         /*
712          * Compress out the stale entries in place.
713          */
714         for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
715                 if (leaf->ents[from].address ==
716                     cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
717                         continue;
718                 /*
719                  * Only actually copy the entries that are different.
720                  */
721                 if (from > to) {
722                         if (loglow == -1)
723                                 loglow = to;
724                         leaf->ents[to] = leaf->ents[from];
725                 }
726                 to++;
727         }
728         /*
729          * Update and log the header, log the leaf entries.
730          */
731         ASSERT(be16_to_cpu(leaf->hdr.stale) == from - to);
732         be16_add_cpu(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale)));
733         leaf->hdr.stale = 0;
734         xfs_dir2_leaf_log_header(args->trans, bp);
735         if (loglow != -1)
736                 xfs_dir2_leaf_log_ents(args->trans, bp, loglow, to - 1);
737 }
738
739 /*
740  * Compact the leaf entries, removing stale ones.
741  * Leave one stale entry behind - the one closest to our
742  * insertion index - and the caller will shift that one to our insertion
743  * point later.
744  * Return new insertion index, where the remaining stale entry is,
745  * and leaf logging indices.
746  */
747 void
748 xfs_dir2_leaf_compact_x1(
749         struct xfs_buf  *bp,            /* leaf buffer */
750         int             *indexp,        /* insertion index */
751         int             *lowstalep,     /* out: stale entry before us */
752         int             *highstalep,    /* out: stale entry after us */
753         int             *lowlogp,       /* out: low log index */
754         int             *highlogp)      /* out: high log index */
755 {
756         int             from;           /* source copy index */
757         int             highstale;      /* stale entry at/after index */
758         int             index;          /* insertion index */
759         int             keepstale;      /* source index of kept stale */
760         xfs_dir2_leaf_t *leaf;          /* leaf structure */
761         int             lowstale;       /* stale entry before index */
762         int             newindex=0;     /* new insertion index */
763         int             to;             /* destination copy index */
764
765         leaf = bp->b_addr;
766         ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
767         index = *indexp;
768
769         xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
770
771         /*
772          * Pick the better of lowstale and highstale.
773          */
774         if (lowstale >= 0 &&
775             (highstale == be16_to_cpu(leaf->hdr.count) ||
776              index - lowstale <= highstale - index))
777                 keepstale = lowstale;
778         else
779                 keepstale = highstale;
780         /*
781          * Copy the entries in place, removing all the stale entries
782          * except keepstale.
783          */
784         for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
785                 /*
786                  * Notice the new value of index.
787                  */
788                 if (index == from)
789                         newindex = to;
790                 if (from != keepstale &&
791                     leaf->ents[from].address ==
792                     cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
793                         if (from == to)
794                                 *lowlogp = to;
795                         continue;
796                 }
797                 /*
798                  * Record the new keepstale value for the insertion.
799                  */
800                 if (from == keepstale)
801                         lowstale = highstale = to;
802                 /*
803                  * Copy only the entries that have moved.
804                  */
805                 if (from > to)
806                         leaf->ents[to] = leaf->ents[from];
807                 to++;
808         }
809         ASSERT(from > to);
810         /*
811          * If the insertion point was past the last entry,
812          * set the new insertion point accordingly.
813          */
814         if (index == from)
815                 newindex = to;
816         *indexp = newindex;
817         /*
818          * Adjust the leaf header values.
819          */
820         be16_add_cpu(&leaf->hdr.count, -(from - to));
821         leaf->hdr.stale = cpu_to_be16(1);
822         /*
823          * Remember the low/high stale value only in the "right"
824          * direction.
825          */
826         if (lowstale >= newindex)
827                 lowstale = -1;
828         else
829                 highstale = be16_to_cpu(leaf->hdr.count);
830         *highlogp = be16_to_cpu(leaf->hdr.count) - 1;
831         *lowstalep = lowstale;
832         *highstalep = highstale;
833 }
834
835 struct xfs_dir2_leaf_map_info {
836         xfs_extlen_t    map_blocks;     /* number of fsbs in map */
837         xfs_dablk_t     map_off;        /* last mapped file offset */
838         int             map_size;       /* total entries in *map */
839         int             map_valid;      /* valid entries in *map */
840         int             nmap;           /* mappings to ask xfs_bmapi */
841         xfs_dir2_db_t   curdb;          /* db for current block */
842         int             ra_current;     /* number of read-ahead blks */
843         int             ra_index;       /* *map index for read-ahead */
844         int             ra_offset;      /* map entry offset for ra */
845         int             ra_want;        /* readahead count wanted */
846         struct xfs_bmbt_irec map[];     /* map vector for blocks */
847 };
848
849 STATIC int
850 xfs_dir2_leaf_readbuf(
851         struct xfs_inode        *dp,
852         size_t                  bufsize,
853         struct xfs_dir2_leaf_map_info *mip,
854         xfs_dir2_off_t          *curoff,
855         struct xfs_buf          **bpp)
856 {
857         struct xfs_mount        *mp = dp->i_mount;
858         struct xfs_buf          *bp = *bpp;
859         struct xfs_bmbt_irec    *map = mip->map;
860         int                     error = 0;
861         int                     length;
862         int                     i;
863         int                     j;
864
865         /*
866          * If we have a buffer, we need to release it and
867          * take it out of the mapping.
868          */
869
870         if (bp) {
871                 xfs_trans_brelse(NULL, bp);
872                 bp = NULL;
873                 mip->map_blocks -= mp->m_dirblkfsbs;
874                 /*
875                  * Loop to get rid of the extents for the
876                  * directory block.
877                  */
878                 for (i = mp->m_dirblkfsbs; i > 0; ) {
879                         j = min_t(int, map->br_blockcount, i);
880                         map->br_blockcount -= j;
881                         map->br_startblock += j;
882                         map->br_startoff += j;
883                         /*
884                          * If mapping is done, pitch it from
885                          * the table.
886                          */
887                         if (!map->br_blockcount && --mip->map_valid)
888                                 memmove(&map[0], &map[1],
889                                         sizeof(map[0]) * mip->map_valid);
890                         i -= j;
891                 }
892         }
893
894         /*
895          * Recalculate the readahead blocks wanted.
896          */
897         mip->ra_want = howmany(bufsize + mp->m_dirblksize,
898                                mp->m_sb.sb_blocksize) - 1;
899         ASSERT(mip->ra_want >= 0);
900
901         /*
902          * If we don't have as many as we want, and we haven't
903          * run out of data blocks, get some more mappings.
904          */
905         if (1 + mip->ra_want > mip->map_blocks &&
906             mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
907                 /*
908                  * Get more bmaps, fill in after the ones
909                  * we already have in the table.
910                  */
911                 mip->nmap = mip->map_size - mip->map_valid;
912                 error = xfs_bmapi_read(dp, mip->map_off,
913                                 xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) -
914                                                                 mip->map_off,
915                                 &map[mip->map_valid], &mip->nmap, 0);
916
917                 /*
918                  * Don't know if we should ignore this or try to return an
919                  * error.  The trouble with returning errors is that readdir
920                  * will just stop without actually passing the error through.
921                  */
922                 if (error)
923                         goto out;       /* XXX */
924
925                 /*
926                  * If we got all the mappings we asked for, set the final map
927                  * offset based on the last bmap value received.  Otherwise,
928                  * we've reached the end.
929                  */
930                 if (mip->nmap == mip->map_size - mip->map_valid) {
931                         i = mip->map_valid + mip->nmap - 1;
932                         mip->map_off = map[i].br_startoff + map[i].br_blockcount;
933                 } else
934                         mip->map_off = xfs_dir2_byte_to_da(mp,
935                                                         XFS_DIR2_LEAF_OFFSET);
936
937                 /*
938                  * Look for holes in the mapping, and eliminate them.  Count up
939                  * the valid blocks.
940                  */
941                 for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) {
942                         if (map[i].br_startblock == HOLESTARTBLOCK) {
943                                 mip->nmap--;
944                                 length = mip->map_valid + mip->nmap - i;
945                                 if (length)
946                                         memmove(&map[i], &map[i + 1],
947                                                 sizeof(map[i]) * length);
948                         } else {
949                                 mip->map_blocks += map[i].br_blockcount;
950                                 i++;
951                         }
952                 }
953                 mip->map_valid += mip->nmap;
954         }
955
956         /*
957          * No valid mappings, so no more data blocks.
958          */
959         if (!mip->map_valid) {
960                 *curoff = xfs_dir2_da_to_byte(mp, mip->map_off);
961                 goto out;
962         }
963
964         /*
965          * Read the directory block starting at the first mapping.
966          */
967         mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
968         error = xfs_dir2_data_read(NULL, dp, map->br_startoff,
969                         map->br_blockcount >= mp->m_dirblkfsbs ?
970                             XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp);
971
972         /*
973          * Should just skip over the data block instead of giving up.
974          */
975         if (error)
976                 goto out;       /* XXX */
977
978         /*
979          * Adjust the current amount of read-ahead: we just read a block that
980          * was previously ra.
981          */
982         if (mip->ra_current)
983                 mip->ra_current -= mp->m_dirblkfsbs;
984
985         /*
986          * Do we need more readahead?
987          */
988         for (mip->ra_index = mip->ra_offset = i = 0;
989              mip->ra_want > mip->ra_current && i < mip->map_blocks;
990              i += mp->m_dirblkfsbs) {
991                 ASSERT(mip->ra_index < mip->map_valid);
992                 /*
993                  * Read-ahead a contiguous directory block.
994                  */
995                 if (i > mip->ra_current &&
996                     map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
997                         xfs_dir2_data_readahead(NULL, dp,
998                                 map[mip->ra_index].br_startoff + mip->ra_offset,
999                                 XFS_FSB_TO_DADDR(mp,
1000                                         map[mip->ra_index].br_startblock +
1001                                                         mip->ra_offset));
1002                         mip->ra_current = i;
1003                 }
1004
1005                 /*
1006                  * Read-ahead a non-contiguous directory block.  This doesn't
1007                  * use our mapping, but this is a very rare case.
1008                  */
1009                 else if (i > mip->ra_current) {
1010                         xfs_dir2_data_readahead(NULL, dp,
1011                                         map[mip->ra_index].br_startoff +
1012                                                         mip->ra_offset, -1);
1013                         mip->ra_current = i;
1014                 }
1015
1016                 /*
1017                  * Advance offset through the mapping table.
1018                  */
1019                 for (j = 0; j < mp->m_dirblkfsbs; j++) {
1020                         /*
1021                          * The rest of this extent but not more than a dir
1022                          * block.
1023                          */
1024                         length = min_t(int, mp->m_dirblkfsbs,
1025                                         map[mip->ra_index].br_blockcount -
1026                                                         mip->ra_offset);
1027                         j += length;
1028                         mip->ra_offset += length;
1029
1030                         /*
1031                          * Advance to the next mapping if this one is used up.
1032                          */
1033                         if (mip->ra_offset == map[mip->ra_index].br_blockcount) {
1034                                 mip->ra_offset = 0;
1035                                 mip->ra_index++;
1036                         }
1037                 }
1038         }
1039
1040 out:
1041         *bpp = bp;
1042         return error;
1043 }
1044
1045 /*
1046  * Getdents (readdir) for leaf and node directories.
1047  * This reads the data blocks only, so is the same for both forms.
1048  */
1049 int                                             /* error */
1050 xfs_dir2_leaf_getdents(
1051         xfs_inode_t             *dp,            /* incore directory inode */
1052         void                    *dirent,
1053         size_t                  bufsize,
1054         xfs_off_t               *offset,
1055         filldir_t               filldir)
1056 {
1057         struct xfs_buf          *bp = NULL;     /* data block buffer */
1058         xfs_dir2_data_hdr_t     *hdr;           /* data block header */
1059         xfs_dir2_data_entry_t   *dep;           /* data entry */
1060         xfs_dir2_data_unused_t  *dup;           /* unused entry */
1061         int                     error = 0;      /* error return value */
1062         int                     length;         /* temporary length value */
1063         xfs_mount_t             *mp;            /* filesystem mount point */
1064         int                     byteoff;        /* offset in current block */
1065         xfs_dir2_off_t          curoff;         /* current overall offset */
1066         xfs_dir2_off_t          newoff;         /* new curoff after new blk */
1067         char                    *ptr = NULL;    /* pointer to current data */
1068         struct xfs_dir2_leaf_map_info *map_info;
1069
1070         /*
1071          * If the offset is at or past the largest allowed value,
1072          * give up right away.
1073          */
1074         if (*offset >= XFS_DIR2_MAX_DATAPTR)
1075                 return 0;
1076
1077         mp = dp->i_mount;
1078
1079         /*
1080          * Set up to bmap a number of blocks based on the caller's
1081          * buffer size, the directory block size, and the filesystem
1082          * block size.
1083          */
1084         length = howmany(bufsize + mp->m_dirblksize,
1085                                      mp->m_sb.sb_blocksize);
1086         map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
1087                                 (length * sizeof(struct xfs_bmbt_irec)),
1088                                KM_SLEEP);
1089         map_info->map_size = length;
1090
1091         /*
1092          * Inside the loop we keep the main offset value as a byte offset
1093          * in the directory file.
1094          */
1095         curoff = xfs_dir2_dataptr_to_byte(mp, *offset);
1096
1097         /*
1098          * Force this conversion through db so we truncate the offset
1099          * down to get the start of the data block.
1100          */
1101         map_info->map_off = xfs_dir2_db_to_da(mp,
1102                                               xfs_dir2_byte_to_db(mp, curoff));
1103
1104         /*
1105          * Loop over directory entries until we reach the end offset.
1106          * Get more blocks and readahead as necessary.
1107          */
1108         while (curoff < XFS_DIR2_LEAF_OFFSET) {
1109                 /*
1110                  * If we have no buffer, or we're off the end of the
1111                  * current buffer, need to get another one.
1112                  */
1113                 if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) {
1114
1115                         error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info,
1116                                                       &curoff, &bp);
1117                         if (error || !map_info->map_valid)
1118                                 break;
1119
1120                         /*
1121                          * Having done a read, we need to set a new offset.
1122                          */
1123                         newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0);
1124                         /*
1125                          * Start of the current block.
1126                          */
1127                         if (curoff < newoff)
1128                                 curoff = newoff;
1129                         /*
1130                          * Make sure we're in the right block.
1131                          */
1132                         else if (curoff > newoff)
1133                                 ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
1134                                        map_info->curdb);
1135                         hdr = bp->b_addr;
1136                         xfs_dir2_data_check(dp, bp);
1137                         /*
1138                          * Find our position in the block.
1139                          */
1140                         ptr = (char *)(hdr + 1);
1141                         byteoff = xfs_dir2_byte_to_off(mp, curoff);
1142                         /*
1143                          * Skip past the header.
1144                          */
1145                         if (byteoff == 0)
1146                                 curoff += (uint)sizeof(*hdr);
1147                         /*
1148                          * Skip past entries until we reach our offset.
1149                          */
1150                         else {
1151                                 while ((char *)ptr - (char *)hdr < byteoff) {
1152                                         dup = (xfs_dir2_data_unused_t *)ptr;
1153
1154                                         if (be16_to_cpu(dup->freetag)
1155                                                   == XFS_DIR2_DATA_FREE_TAG) {
1156
1157                                                 length = be16_to_cpu(dup->length);
1158                                                 ptr += length;
1159                                                 continue;
1160                                         }
1161                                         dep = (xfs_dir2_data_entry_t *)ptr;
1162                                         length =
1163                                            xfs_dir2_data_entsize(dep->namelen);
1164                                         ptr += length;
1165                                 }
1166                                 /*
1167                                  * Now set our real offset.
1168                                  */
1169                                 curoff =
1170                                         xfs_dir2_db_off_to_byte(mp,
1171                                             xfs_dir2_byte_to_db(mp, curoff),
1172                                             (char *)ptr - (char *)hdr);
1173                                 if (ptr >= (char *)hdr + mp->m_dirblksize) {
1174                                         continue;
1175                                 }
1176                         }
1177                 }
1178                 /*
1179                  * We have a pointer to an entry.
1180                  * Is it a live one?
1181                  */
1182                 dup = (xfs_dir2_data_unused_t *)ptr;
1183                 /*
1184                  * No, it's unused, skip over it.
1185                  */
1186                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
1187                         length = be16_to_cpu(dup->length);
1188                         ptr += length;
1189                         curoff += length;
1190                         continue;
1191                 }
1192
1193                 dep = (xfs_dir2_data_entry_t *)ptr;
1194                 length = xfs_dir2_data_entsize(dep->namelen);
1195
1196                 if (filldir(dirent, (char *)dep->name, dep->namelen,
1197                             xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff,
1198                             be64_to_cpu(dep->inumber), DT_UNKNOWN))
1199                         break;
1200
1201                 /*
1202                  * Advance to next entry in the block.
1203                  */
1204                 ptr += length;
1205                 curoff += length;
1206                 /* bufsize may have just been a guess; don't go negative */
1207                 bufsize = bufsize > length ? bufsize - length : 0;
1208         }
1209
1210         /*
1211          * All done.  Set output offset value to current offset.
1212          */
1213         if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
1214                 *offset = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
1215         else
1216                 *offset = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
1217         kmem_free(map_info);
1218         if (bp)
1219                 xfs_trans_brelse(NULL, bp);
1220         return error;
1221 }
1222
1223 /*
1224  * Initialize a new leaf block, leaf1 or leafn magic accepted.
1225  */
1226 int
1227 xfs_dir2_leaf_init(
1228         xfs_da_args_t           *args,          /* operation arguments */
1229         xfs_dir2_db_t           bno,            /* directory block number */
1230         struct xfs_buf          **bpp,          /* out: leaf buffer */
1231         int                     magic)          /* magic number for block */
1232 {
1233         struct xfs_buf          *bp;            /* leaf buffer */
1234         xfs_inode_t             *dp;            /* incore directory inode */
1235         int                     error;          /* error return code */
1236         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1237         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
1238         xfs_mount_t             *mp;            /* filesystem mount point */
1239         xfs_trans_t             *tp;            /* transaction pointer */
1240
1241         dp = args->dp;
1242         ASSERT(dp != NULL);
1243         tp = args->trans;
1244         mp = dp->i_mount;
1245         ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) &&
1246                bno < XFS_DIR2_FREE_FIRSTDB(mp));
1247         /*
1248          * Get the buffer for the block.
1249          */
1250         error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
1251                                XFS_DATA_FORK);
1252         if (error)
1253                 return error;
1254
1255         /*
1256          * Initialize the header.
1257          */
1258         leaf = bp->b_addr;
1259         leaf->hdr.info.magic = cpu_to_be16(magic);
1260         leaf->hdr.info.forw = 0;
1261         leaf->hdr.info.back = 0;
1262         leaf->hdr.count = 0;
1263         leaf->hdr.stale = 0;
1264         xfs_dir2_leaf_log_header(tp, bp);
1265         /*
1266          * If it's a leaf-format directory initialize the tail.
1267          * In this case our caller has the real bests table to copy into
1268          * the block.
1269          */
1270         if (magic == XFS_DIR2_LEAF1_MAGIC) {
1271                 bp->b_ops = &xfs_dir2_leaf1_buf_ops;
1272                 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1273                 ltp->bestcount = 0;
1274                 xfs_dir2_leaf_log_tail(tp, bp);
1275         } else
1276                 bp->b_ops = &xfs_dir2_leafn_buf_ops;
1277         *bpp = bp;
1278         return 0;
1279 }
1280
1281 /*
1282  * Log the bests entries indicated from a leaf1 block.
1283  */
1284 static void
1285 xfs_dir2_leaf_log_bests(
1286         xfs_trans_t             *tp,            /* transaction pointer */
1287         struct xfs_buf          *bp,            /* leaf buffer */
1288         int                     first,          /* first entry to log */
1289         int                     last)           /* last entry to log */
1290 {
1291         __be16                  *firstb;        /* pointer to first entry */
1292         __be16                  *lastb;         /* pointer to last entry */
1293         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1294         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
1295
1296         leaf = bp->b_addr;
1297         ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
1298         ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
1299         firstb = xfs_dir2_leaf_bests_p(ltp) + first;
1300         lastb = xfs_dir2_leaf_bests_p(ltp) + last;
1301         xfs_trans_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
1302                 (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
1303 }
1304
1305 /*
1306  * Log the leaf entries indicated from a leaf1 or leafn block.
1307  */
1308 void
1309 xfs_dir2_leaf_log_ents(
1310         xfs_trans_t             *tp,            /* transaction pointer */
1311         struct xfs_buf          *bp,            /* leaf buffer */
1312         int                     first,          /* first entry to log */
1313         int                     last)           /* last entry to log */
1314 {
1315         xfs_dir2_leaf_entry_t   *firstlep;      /* pointer to first entry */
1316         xfs_dir2_leaf_entry_t   *lastlep;       /* pointer to last entry */
1317         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1318
1319         leaf = bp->b_addr;
1320         ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1321                leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1322         firstlep = &leaf->ents[first];
1323         lastlep = &leaf->ents[last];
1324         xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
1325                 (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
1326 }
1327
1328 /*
1329  * Log the header of the leaf1 or leafn block.
1330  */
1331 void
1332 xfs_dir2_leaf_log_header(
1333         struct xfs_trans        *tp,
1334         struct xfs_buf          *bp)
1335 {
1336         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1337
1338         leaf = bp->b_addr;
1339         ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1340                leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1341         xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
1342                 (uint)(sizeof(leaf->hdr) - 1));
1343 }
1344
1345 /*
1346  * Log the tail of the leaf1 block.
1347  */
1348 STATIC void
1349 xfs_dir2_leaf_log_tail(
1350         struct xfs_trans        *tp,
1351         struct xfs_buf          *bp)
1352 {
1353         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1354         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
1355         xfs_mount_t             *mp;            /* filesystem mount point */
1356
1357         mp = tp->t_mountp;
1358         leaf = bp->b_addr;
1359         ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
1360         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1361         xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
1362                 (uint)(mp->m_dirblksize - 1));
1363 }
1364
1365 /*
1366  * Look up the entry referred to by args in the leaf format directory.
1367  * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which
1368  * is also used by the node-format code.
1369  */
1370 int
1371 xfs_dir2_leaf_lookup(
1372         xfs_da_args_t           *args)          /* operation arguments */
1373 {
1374         struct xfs_buf          *dbp;           /* data block buffer */
1375         xfs_dir2_data_entry_t   *dep;           /* data block entry */
1376         xfs_inode_t             *dp;            /* incore directory inode */
1377         int                     error;          /* error return code */
1378         int                     index;          /* found entry index */
1379         struct xfs_buf          *lbp;           /* leaf buffer */
1380         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1381         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
1382         xfs_trans_t             *tp;            /* transaction pointer */
1383
1384         trace_xfs_dir2_leaf_lookup(args);
1385
1386         /*
1387          * Look up name in the leaf block, returning both buffers and index.
1388          */
1389         if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1390                 return error;
1391         }
1392         tp = args->trans;
1393         dp = args->dp;
1394         xfs_dir2_leaf_check(dp, lbp);
1395         leaf = lbp->b_addr;
1396         /*
1397          * Get to the leaf entry and contained data entry address.
1398          */
1399         lep = &leaf->ents[index];
1400         /*
1401          * Point to the data entry.
1402          */
1403         dep = (xfs_dir2_data_entry_t *)
1404               ((char *)dbp->b_addr +
1405                xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1406         /*
1407          * Return the found inode number & CI name if appropriate
1408          */
1409         args->inumber = be64_to_cpu(dep->inumber);
1410         error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
1411         xfs_trans_brelse(tp, dbp);
1412         xfs_trans_brelse(tp, lbp);
1413         return XFS_ERROR(error);
1414 }
1415
1416 /*
1417  * Look up name/hash in the leaf block.
1418  * Fill in indexp with the found index, and dbpp with the data buffer.
1419  * If not found dbpp will be NULL, and ENOENT comes back.
1420  * lbpp will always be filled in with the leaf buffer unless there's an error.
1421  */
1422 static int                                      /* error */
1423 xfs_dir2_leaf_lookup_int(
1424         xfs_da_args_t           *args,          /* operation arguments */
1425         struct xfs_buf          **lbpp,         /* out: leaf buffer */
1426         int                     *indexp,        /* out: index in leaf block */
1427         struct xfs_buf          **dbpp)         /* out: data buffer */
1428 {
1429         xfs_dir2_db_t           curdb = -1;     /* current data block number */
1430         struct xfs_buf          *dbp = NULL;    /* data buffer */
1431         xfs_dir2_data_entry_t   *dep;           /* data entry */
1432         xfs_inode_t             *dp;            /* incore directory inode */
1433         int                     error;          /* error return code */
1434         int                     index;          /* index in leaf block */
1435         struct xfs_buf          *lbp;           /* leaf buffer */
1436         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
1437         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1438         xfs_mount_t             *mp;            /* filesystem mount point */
1439         xfs_dir2_db_t           newdb;          /* new data block number */
1440         xfs_trans_t             *tp;            /* transaction pointer */
1441         xfs_dir2_db_t           cidb = -1;      /* case match data block no. */
1442         enum xfs_dacmp          cmp;            /* name compare result */
1443
1444         dp = args->dp;
1445         tp = args->trans;
1446         mp = dp->i_mount;
1447
1448         error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
1449         if (error)
1450                 return error;
1451
1452         *lbpp = lbp;
1453         leaf = lbp->b_addr;
1454         xfs_dir2_leaf_check(dp, lbp);
1455         /*
1456          * Look for the first leaf entry with our hash value.
1457          */
1458         index = xfs_dir2_leaf_search_hash(args, lbp);
1459         /*
1460          * Loop over all the entries with the right hash value
1461          * looking to match the name.
1462          */
1463         for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
1464                                 be32_to_cpu(lep->hashval) == args->hashval;
1465                                 lep++, index++) {
1466                 /*
1467                  * Skip over stale leaf entries.
1468                  */
1469                 if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
1470                         continue;
1471                 /*
1472                  * Get the new data block number.
1473                  */
1474                 newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1475                 /*
1476                  * If it's not the same as the old data block number,
1477                  * need to pitch the old one and read the new one.
1478                  */
1479                 if (newdb != curdb) {
1480                         if (dbp)
1481                                 xfs_trans_brelse(tp, dbp);
1482                         error = xfs_dir2_data_read(tp, dp,
1483                                                    xfs_dir2_db_to_da(mp, newdb),
1484                                                    -1, &dbp);
1485                         if (error) {
1486                                 xfs_trans_brelse(tp, lbp);
1487                                 return error;
1488                         }
1489                         curdb = newdb;
1490                 }
1491                 /*
1492                  * Point to the data entry.
1493                  */
1494                 dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr +
1495                         xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1496                 /*
1497                  * Compare name and if it's an exact match, return the index
1498                  * and buffer. If it's the first case-insensitive match, store
1499                  * the index and buffer and continue looking for an exact match.
1500                  */
1501                 cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
1502                 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
1503                         args->cmpresult = cmp;
1504                         *indexp = index;
1505                         /* case exact match: return the current buffer. */
1506                         if (cmp == XFS_CMP_EXACT) {
1507                                 *dbpp = dbp;
1508                                 return 0;
1509                         }
1510                         cidb = curdb;
1511                 }
1512         }
1513         ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1514         /*
1515          * Here, we can only be doing a lookup (not a rename or remove).
1516          * If a case-insensitive match was found earlier, re-read the
1517          * appropriate data block if required and return it.
1518          */
1519         if (args->cmpresult == XFS_CMP_CASE) {
1520                 ASSERT(cidb != -1);
1521                 if (cidb != curdb) {
1522                         xfs_trans_brelse(tp, dbp);
1523                         error = xfs_dir2_data_read(tp, dp,
1524                                                    xfs_dir2_db_to_da(mp, cidb),
1525                                                    -1, &dbp);
1526                         if (error) {
1527                                 xfs_trans_brelse(tp, lbp);
1528                                 return error;
1529                         }
1530                 }
1531                 *dbpp = dbp;
1532                 return 0;
1533         }
1534         /*
1535          * No match found, return ENOENT.
1536          */
1537         ASSERT(cidb == -1);
1538         if (dbp)
1539                 xfs_trans_brelse(tp, dbp);
1540         xfs_trans_brelse(tp, lbp);
1541         return XFS_ERROR(ENOENT);
1542 }
1543
1544 /*
1545  * Remove an entry from a leaf format directory.
1546  */
1547 int                                             /* error */
1548 xfs_dir2_leaf_removename(
1549         xfs_da_args_t           *args)          /* operation arguments */
1550 {
1551         __be16                  *bestsp;        /* leaf block best freespace */
1552         xfs_dir2_data_hdr_t     *hdr;           /* data block header */
1553         xfs_dir2_db_t           db;             /* data block number */
1554         struct xfs_buf          *dbp;           /* data block buffer */
1555         xfs_dir2_data_entry_t   *dep;           /* data entry structure */
1556         xfs_inode_t             *dp;            /* incore directory inode */
1557         int                     error;          /* error return code */
1558         xfs_dir2_db_t           i;              /* temporary data block # */
1559         int                     index;          /* index into leaf entries */
1560         struct xfs_buf          *lbp;           /* leaf buffer */
1561         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1562         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
1563         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
1564         xfs_mount_t             *mp;            /* filesystem mount point */
1565         int                     needlog;        /* need to log data header */
1566         int                     needscan;       /* need to rescan data frees */
1567         xfs_dir2_data_off_t     oldbest;        /* old value of best free */
1568         xfs_trans_t             *tp;            /* transaction pointer */
1569
1570         trace_xfs_dir2_leaf_removename(args);
1571
1572         /*
1573          * Lookup the leaf entry, get the leaf and data blocks read in.
1574          */
1575         if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1576                 return error;
1577         }
1578         dp = args->dp;
1579         tp = args->trans;
1580         mp = dp->i_mount;
1581         leaf = lbp->b_addr;
1582         hdr = dbp->b_addr;
1583         xfs_dir2_data_check(dp, dbp);
1584         /*
1585          * Point to the leaf entry, use that to point to the data entry.
1586          */
1587         lep = &leaf->ents[index];
1588         db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1589         dep = (xfs_dir2_data_entry_t *)
1590               ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1591         needscan = needlog = 0;
1592         oldbest = be16_to_cpu(hdr->bestfree[0].length);
1593         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1594         bestsp = xfs_dir2_leaf_bests_p(ltp);
1595         ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
1596         /*
1597          * Mark the former data entry unused.
1598          */
1599         xfs_dir2_data_make_free(tp, dbp,
1600                 (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
1601                 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
1602         /*
1603          * We just mark the leaf entry stale by putting a null in it.
1604          */
1605         be16_add_cpu(&leaf->hdr.stale, 1);
1606         xfs_dir2_leaf_log_header(tp, lbp);
1607         lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
1608         xfs_dir2_leaf_log_ents(tp, lbp, index, index);
1609         /*
1610          * Scan the freespace in the data block again if necessary,
1611          * log the data block header if necessary.
1612          */
1613         if (needscan)
1614                 xfs_dir2_data_freescan(mp, hdr, &needlog);
1615         if (needlog)
1616                 xfs_dir2_data_log_header(tp, dbp);
1617         /*
1618          * If the longest freespace in the data block has changed,
1619          * put the new value in the bests table and log that.
1620          */
1621         if (be16_to_cpu(hdr->bestfree[0].length) != oldbest) {
1622                 bestsp[db] = hdr->bestfree[0].length;
1623                 xfs_dir2_leaf_log_bests(tp, lbp, db, db);
1624         }
1625         xfs_dir2_data_check(dp, dbp);
1626         /*
1627          * If the data block is now empty then get rid of the data block.
1628          */
1629         if (be16_to_cpu(hdr->bestfree[0].length) ==
1630             mp->m_dirblksize - (uint)sizeof(*hdr)) {
1631                 ASSERT(db != mp->m_dirdatablk);
1632                 if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1633                         /*
1634                          * Nope, can't get rid of it because it caused
1635                          * allocation of a bmap btree block to do so.
1636                          * Just go on, returning success, leaving the
1637                          * empty block in place.
1638                          */
1639                         if (error == ENOSPC && args->total == 0)
1640                                 error = 0;
1641                         xfs_dir2_leaf_check(dp, lbp);
1642                         return error;
1643                 }
1644                 dbp = NULL;
1645                 /*
1646                  * If this is the last data block then compact the
1647                  * bests table by getting rid of entries.
1648                  */
1649                 if (db == be32_to_cpu(ltp->bestcount) - 1) {
1650                         /*
1651                          * Look for the last active entry (i).
1652                          */
1653                         for (i = db - 1; i > 0; i--) {
1654                                 if (bestsp[i] != cpu_to_be16(NULLDATAOFF))
1655                                         break;
1656                         }
1657                         /*
1658                          * Copy the table down so inactive entries at the
1659                          * end are removed.
1660                          */
1661                         memmove(&bestsp[db - i], bestsp,
1662                                 (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
1663                         be32_add_cpu(&ltp->bestcount, -(db - i));
1664                         xfs_dir2_leaf_log_tail(tp, lbp);
1665                         xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1666                 } else
1667                         bestsp[db] = cpu_to_be16(NULLDATAOFF);
1668         }
1669         /*
1670          * If the data block was not the first one, drop it.
1671          */
1672         else if (db != mp->m_dirdatablk)
1673                 dbp = NULL;
1674
1675         xfs_dir2_leaf_check(dp, lbp);
1676         /*
1677          * See if we can convert to block form.
1678          */
1679         return xfs_dir2_leaf_to_block(args, lbp, dbp);
1680 }
1681
1682 /*
1683  * Replace the inode number in a leaf format directory entry.
1684  */
1685 int                                             /* error */
1686 xfs_dir2_leaf_replace(
1687         xfs_da_args_t           *args)          /* operation arguments */
1688 {
1689         struct xfs_buf          *dbp;           /* data block buffer */
1690         xfs_dir2_data_entry_t   *dep;           /* data block entry */
1691         xfs_inode_t             *dp;            /* incore directory inode */
1692         int                     error;          /* error return code */
1693         int                     index;          /* index of leaf entry */
1694         struct xfs_buf          *lbp;           /* leaf buffer */
1695         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1696         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
1697         xfs_trans_t             *tp;            /* transaction pointer */
1698
1699         trace_xfs_dir2_leaf_replace(args);
1700
1701         /*
1702          * Look up the entry.
1703          */
1704         if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1705                 return error;
1706         }
1707         dp = args->dp;
1708         leaf = lbp->b_addr;
1709         /*
1710          * Point to the leaf entry, get data address from it.
1711          */
1712         lep = &leaf->ents[index];
1713         /*
1714          * Point to the data entry.
1715          */
1716         dep = (xfs_dir2_data_entry_t *)
1717               ((char *)dbp->b_addr +
1718                xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1719         ASSERT(args->inumber != be64_to_cpu(dep->inumber));
1720         /*
1721          * Put the new inode number in, log it.
1722          */
1723         dep->inumber = cpu_to_be64(args->inumber);
1724         tp = args->trans;
1725         xfs_dir2_data_log_entry(tp, dbp, dep);
1726         xfs_dir2_leaf_check(dp, lbp);
1727         xfs_trans_brelse(tp, lbp);
1728         return 0;
1729 }
1730
1731 /*
1732  * Return index in the leaf block (lbp) which is either the first
1733  * one with this hash value, or if there are none, the insert point
1734  * for that hash value.
1735  */
1736 int                                             /* index value */
1737 xfs_dir2_leaf_search_hash(
1738         xfs_da_args_t           *args,          /* operation arguments */
1739         struct xfs_buf          *lbp)           /* leaf buffer */
1740 {
1741         xfs_dahash_t            hash=0;         /* hash from this entry */
1742         xfs_dahash_t            hashwant;       /* hash value looking for */
1743         int                     high;           /* high leaf index */
1744         int                     low;            /* low leaf index */
1745         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1746         xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
1747         int                     mid=0;          /* current leaf index */
1748
1749         leaf = lbp->b_addr;
1750 #ifndef __KERNEL__
1751         if (!leaf->hdr.count)
1752                 return 0;
1753 #endif
1754         /*
1755          * Note, the table cannot be empty, so we have to go through the loop.
1756          * Binary search the leaf entries looking for our hash value.
1757          */
1758         for (lep = leaf->ents, low = 0, high = be16_to_cpu(leaf->hdr.count) - 1,
1759                 hashwant = args->hashval;
1760              low <= high; ) {
1761                 mid = (low + high) >> 1;
1762                 if ((hash = be32_to_cpu(lep[mid].hashval)) == hashwant)
1763                         break;
1764                 if (hash < hashwant)
1765                         low = mid + 1;
1766                 else
1767                         high = mid - 1;
1768         }
1769         /*
1770          * Found one, back up through all the equal hash values.
1771          */
1772         if (hash == hashwant) {
1773                 while (mid > 0 && be32_to_cpu(lep[mid - 1].hashval) == hashwant) {
1774                         mid--;
1775                 }
1776         }
1777         /*
1778          * Need to point to an entry higher than ours.
1779          */
1780         else if (hash < hashwant)
1781                 mid++;
1782         return mid;
1783 }
1784
1785 /*
1786  * Trim off a trailing data block.  We know it's empty since the leaf
1787  * freespace table says so.
1788  */
1789 int                                             /* error */
1790 xfs_dir2_leaf_trim_data(
1791         xfs_da_args_t           *args,          /* operation arguments */
1792         struct xfs_buf          *lbp,           /* leaf buffer */
1793         xfs_dir2_db_t           db)             /* data block number */
1794 {
1795         __be16                  *bestsp;        /* leaf bests table */
1796         struct xfs_buf          *dbp;           /* data block buffer */
1797         xfs_inode_t             *dp;            /* incore directory inode */
1798         int                     error;          /* error return value */
1799         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1800         xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
1801         xfs_mount_t             *mp;            /* filesystem mount point */
1802         xfs_trans_t             *tp;            /* transaction pointer */
1803
1804         dp = args->dp;
1805         mp = dp->i_mount;
1806         tp = args->trans;
1807         /*
1808          * Read the offending data block.  We need its buffer.
1809          */
1810         error = xfs_dir2_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp);
1811         if (error)
1812                 return error;
1813
1814         leaf = lbp->b_addr;
1815         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1816
1817 #ifdef DEBUG
1818 {
1819         struct xfs_dir2_data_hdr *hdr = dbp->b_addr;
1820
1821         ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
1822         ASSERT(be16_to_cpu(hdr->bestfree[0].length) ==
1823                mp->m_dirblksize - (uint)sizeof(*hdr));
1824         ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
1825 }
1826 #endif
1827
1828         /*
1829          * Get rid of the data block.
1830          */
1831         if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1832                 ASSERT(error != ENOSPC);
1833                 xfs_trans_brelse(tp, dbp);
1834                 return error;
1835         }
1836         /*
1837          * Eliminate the last bests entry from the table.
1838          */
1839         bestsp = xfs_dir2_leaf_bests_p(ltp);
1840         be32_add_cpu(&ltp->bestcount, -1);
1841         memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
1842         xfs_dir2_leaf_log_tail(tp, lbp);
1843         xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1844         return 0;
1845 }
1846
1847 static inline size_t
1848 xfs_dir2_leaf_size(
1849         struct xfs_dir2_leaf_hdr        *hdr,
1850         int                             counts)
1851 {
1852         int                     entries;
1853
1854         entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale);
1855         return sizeof(xfs_dir2_leaf_hdr_t) +
1856             entries * sizeof(xfs_dir2_leaf_entry_t) +
1857             counts * sizeof(xfs_dir2_data_off_t) +
1858             sizeof(xfs_dir2_leaf_tail_t);
1859 }
1860
1861 /*
1862  * Convert node form directory to leaf form directory.
1863  * The root of the node form dir needs to already be a LEAFN block.
1864  * Just return if we can't do anything.
1865  */
1866 int                                             /* error */
1867 xfs_dir2_node_to_leaf(
1868         xfs_da_state_t          *state)         /* directory operation state */
1869 {
1870         xfs_da_args_t           *args;          /* operation arguments */
1871         xfs_inode_t             *dp;            /* incore directory inode */
1872         int                     error;          /* error return code */
1873         struct xfs_buf          *fbp;           /* buffer for freespace block */
1874         xfs_fileoff_t           fo;             /* freespace file offset */
1875         xfs_dir2_free_t         *free;          /* freespace structure */
1876         struct xfs_buf          *lbp;           /* buffer for leaf block */
1877         xfs_dir2_leaf_tail_t    *ltp;           /* tail of leaf structure */
1878         xfs_dir2_leaf_t         *leaf;          /* leaf structure */
1879         xfs_mount_t             *mp;            /* filesystem mount point */
1880         int                     rval;           /* successful free trim? */
1881         xfs_trans_t             *tp;            /* transaction pointer */
1882
1883         /*
1884          * There's more than a leaf level in the btree, so there must
1885          * be multiple leafn blocks.  Give up.
1886          */
1887         if (state->path.active > 1)
1888                 return 0;
1889         args = state->args;
1890
1891         trace_xfs_dir2_node_to_leaf(args);
1892
1893         mp = state->mp;
1894         dp = args->dp;
1895         tp = args->trans;
1896         /*
1897          * Get the last offset in the file.
1898          */
1899         if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) {
1900                 return error;
1901         }
1902         fo -= mp->m_dirblkfsbs;
1903         /*
1904          * If there are freespace blocks other than the first one,
1905          * take this opportunity to remove trailing empty freespace blocks
1906          * that may have been left behind during no-space-reservation
1907          * operations.
1908          */
1909         while (fo > mp->m_dirfreeblk) {
1910                 if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) {
1911                         return error;
1912                 }
1913                 if (rval)
1914                         fo -= mp->m_dirblkfsbs;
1915                 else
1916                         return 0;
1917         }
1918         /*
1919          * Now find the block just before the freespace block.
1920          */
1921         if ((error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK))) {
1922                 return error;
1923         }
1924         /*
1925          * If it's not the single leaf block, give up.
1926          */
1927         if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize)
1928                 return 0;
1929         lbp = state->path.blk[0].bp;
1930         leaf = lbp->b_addr;
1931         ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1932         /*
1933          * Read the freespace block.
1934          */
1935         error = xfs_dir2_free_read(tp, dp,  mp->m_dirfreeblk, &fbp);
1936         if (error)
1937                 return error;
1938         free = fbp->b_addr;
1939         ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
1940         ASSERT(!free->hdr.firstdb);
1941
1942         /*
1943          * Now see if the leafn and free data will fit in a leaf1.
1944          * If not, release the buffer and give up.
1945          */
1946         if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) >
1947                         mp->m_dirblksize) {
1948                 xfs_trans_brelse(tp, fbp);
1949                 return 0;
1950         }
1951
1952         /*
1953          * If the leaf has any stale entries in it, compress them out.
1954          * The compact routine will log the header.
1955          */
1956         if (be16_to_cpu(leaf->hdr.stale))
1957                 xfs_dir2_leaf_compact(args, lbp);
1958         else
1959                 xfs_dir2_leaf_log_header(tp, lbp);
1960
1961         lbp->b_ops = &xfs_dir2_leaf1_buf_ops;
1962         leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAF1_MAGIC);
1963
1964         /*
1965          * Set up the leaf tail from the freespace block.
1966          */
1967         ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1968         ltp->bestcount = free->hdr.nvalid;
1969         /*
1970          * Set up the leaf bests table.
1971          */
1972         memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests,
1973                 be32_to_cpu(ltp->bestcount) * sizeof(xfs_dir2_data_off_t));
1974         xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1975         xfs_dir2_leaf_log_tail(tp, lbp);
1976         xfs_dir2_leaf_check(dp, lbp);
1977         /*
1978          * Get rid of the freespace block.
1979          */
1980         error = xfs_dir2_shrink_inode(args, XFS_DIR2_FREE_FIRSTDB(mp), fbp);
1981         if (error) {
1982                 /*
1983                  * This can't fail here because it can only happen when
1984                  * punching out the middle of an extent, and this is an
1985                  * isolated block.
1986                  */
1987                 ASSERT(error != ENOSPC);
1988                 return error;
1989         }
1990         fbp = NULL;
1991         /*
1992          * Now see if we can convert the single-leaf directory
1993          * down to a block form directory.
1994          * This routine always kills the dabuf for the leaf, so
1995          * eliminate it from the path.
1996          */
1997         error = xfs_dir2_leaf_to_block(args, lbp, NULL);
1998         state->path.blk[0].bp = NULL;
1999         return error;
2000 }