]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/xfs/libxfs/xfs_bmap.c
Merge branch 'xfs-dax-updates' into for-next
[karo-tx-linux.git] / fs / xfs / libxfs / xfs_bmap.c
1 /*
2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_sb.h"
26 #include "xfs_mount.h"
27 #include "xfs_da_format.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_dir2.h"
30 #include "xfs_inode.h"
31 #include "xfs_btree.h"
32 #include "xfs_trans.h"
33 #include "xfs_inode_item.h"
34 #include "xfs_extfree_item.h"
35 #include "xfs_alloc.h"
36 #include "xfs_bmap.h"
37 #include "xfs_bmap_util.h"
38 #include "xfs_bmap_btree.h"
39 #include "xfs_rtalloc.h"
40 #include "xfs_error.h"
41 #include "xfs_quota.h"
42 #include "xfs_trans_space.h"
43 #include "xfs_buf_item.h"
44 #include "xfs_trace.h"
45 #include "xfs_symlink.h"
46 #include "xfs_attr_leaf.h"
47 #include "xfs_filestream.h"
48
49
50 kmem_zone_t             *xfs_bmap_free_item_zone;
51
52 /*
53  * Miscellaneous helper functions
54  */
55
56 /*
57  * Compute and fill in the value of the maximum depth of a bmap btree
58  * in this filesystem.  Done once, during mount.
59  */
60 void
61 xfs_bmap_compute_maxlevels(
62         xfs_mount_t     *mp,            /* file system mount structure */
63         int             whichfork)      /* data or attr fork */
64 {
65         int             level;          /* btree level */
66         uint            maxblocks;      /* max blocks at this level */
67         uint            maxleafents;    /* max leaf entries possible */
68         int             maxrootrecs;    /* max records in root block */
69         int             minleafrecs;    /* min records in leaf block */
70         int             minnoderecs;    /* min records in node block */
71         int             sz;             /* root block size */
72
73         /*
74          * The maximum number of extents in a file, hence the maximum
75          * number of leaf entries, is controlled by the type of di_nextents
76          * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
77          * (a signed 16-bit number, xfs_aextnum_t).
78          *
79          * Note that we can no longer assume that if we are in ATTR1 that
80          * the fork offset of all the inodes will be
81          * (xfs_default_attroffset(ip) >> 3) because we could have mounted
82          * with ATTR2 and then mounted back with ATTR1, keeping the
83          * di_forkoff's fixed but probably at various positions. Therefore,
84          * for both ATTR1 and ATTR2 we have to assume the worst case scenario
85          * of a minimum size available.
86          */
87         if (whichfork == XFS_DATA_FORK) {
88                 maxleafents = MAXEXTNUM;
89                 sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
90         } else {
91                 maxleafents = MAXAEXTNUM;
92                 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
93         }
94         maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
95         minleafrecs = mp->m_bmap_dmnr[0];
96         minnoderecs = mp->m_bmap_dmnr[1];
97         maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
98         for (level = 1; maxblocks > 1; level++) {
99                 if (maxblocks <= maxrootrecs)
100                         maxblocks = 1;
101                 else
102                         maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
103         }
104         mp->m_bm_maxlevels[whichfork] = level;
105 }
106
107 STATIC int                              /* error */
108 xfs_bmbt_lookup_eq(
109         struct xfs_btree_cur    *cur,
110         xfs_fileoff_t           off,
111         xfs_fsblock_t           bno,
112         xfs_filblks_t           len,
113         int                     *stat)  /* success/failure */
114 {
115         cur->bc_rec.b.br_startoff = off;
116         cur->bc_rec.b.br_startblock = bno;
117         cur->bc_rec.b.br_blockcount = len;
118         return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
119 }
120
121 STATIC int                              /* error */
122 xfs_bmbt_lookup_ge(
123         struct xfs_btree_cur    *cur,
124         xfs_fileoff_t           off,
125         xfs_fsblock_t           bno,
126         xfs_filblks_t           len,
127         int                     *stat)  /* success/failure */
128 {
129         cur->bc_rec.b.br_startoff = off;
130         cur->bc_rec.b.br_startblock = bno;
131         cur->bc_rec.b.br_blockcount = len;
132         return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
133 }
134
135 /*
136  * Check if the inode needs to be converted to btree format.
137  */
138 static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
139 {
140         return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
141                 XFS_IFORK_NEXTENTS(ip, whichfork) >
142                         XFS_IFORK_MAXEXT(ip, whichfork);
143 }
144
145 /*
146  * Check if the inode should be converted to extent format.
147  */
148 static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
149 {
150         return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
151                 XFS_IFORK_NEXTENTS(ip, whichfork) <=
152                         XFS_IFORK_MAXEXT(ip, whichfork);
153 }
154
155 /*
156  * Update the record referred to by cur to the value given
157  * by [off, bno, len, state].
158  * This either works (return 0) or gets an EFSCORRUPTED error.
159  */
160 STATIC int
161 xfs_bmbt_update(
162         struct xfs_btree_cur    *cur,
163         xfs_fileoff_t           off,
164         xfs_fsblock_t           bno,
165         xfs_filblks_t           len,
166         xfs_exntst_t            state)
167 {
168         union xfs_btree_rec     rec;
169
170         xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state);
171         return xfs_btree_update(cur, &rec);
172 }
173
174 /*
175  * Compute the worst-case number of indirect blocks that will be used
176  * for ip's delayed extent of length "len".
177  */
178 STATIC xfs_filblks_t
179 xfs_bmap_worst_indlen(
180         xfs_inode_t     *ip,            /* incore inode pointer */
181         xfs_filblks_t   len)            /* delayed extent length */
182 {
183         int             level;          /* btree level number */
184         int             maxrecs;        /* maximum record count at this level */
185         xfs_mount_t     *mp;            /* mount structure */
186         xfs_filblks_t   rval;           /* return value */
187
188         mp = ip->i_mount;
189         maxrecs = mp->m_bmap_dmxr[0];
190         for (level = 0, rval = 0;
191              level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
192              level++) {
193                 len += maxrecs - 1;
194                 do_div(len, maxrecs);
195                 rval += len;
196                 if (len == 1)
197                         return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
198                                 level - 1;
199                 if (level == 0)
200                         maxrecs = mp->m_bmap_dmxr[1];
201         }
202         return rval;
203 }
204
205 /*
206  * Calculate the default attribute fork offset for newly created inodes.
207  */
208 uint
209 xfs_default_attroffset(
210         struct xfs_inode        *ip)
211 {
212         struct xfs_mount        *mp = ip->i_mount;
213         uint                    offset;
214
215         if (mp->m_sb.sb_inodesize == 256) {
216                 offset = XFS_LITINO(mp, ip->i_d.di_version) -
217                                 XFS_BMDR_SPACE_CALC(MINABTPTRS);
218         } else {
219                 offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
220         }
221
222         ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version));
223         return offset;
224 }
225
226 /*
227  * Helper routine to reset inode di_forkoff field when switching
228  * attribute fork from local to extent format - we reset it where
229  * possible to make space available for inline data fork extents.
230  */
231 STATIC void
232 xfs_bmap_forkoff_reset(
233         xfs_inode_t     *ip,
234         int             whichfork)
235 {
236         if (whichfork == XFS_ATTR_FORK &&
237             ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
238             ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
239             ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
240                 uint    dfl_forkoff = xfs_default_attroffset(ip) >> 3;
241
242                 if (dfl_forkoff > ip->i_d.di_forkoff)
243                         ip->i_d.di_forkoff = dfl_forkoff;
244         }
245 }
246
247 #ifdef DEBUG
248 STATIC struct xfs_buf *
249 xfs_bmap_get_bp(
250         struct xfs_btree_cur    *cur,
251         xfs_fsblock_t           bno)
252 {
253         struct xfs_log_item_desc *lidp;
254         int                     i;
255
256         if (!cur)
257                 return NULL;
258
259         for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
260                 if (!cur->bc_bufs[i])
261                         break;
262                 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
263                         return cur->bc_bufs[i];
264         }
265
266         /* Chase down all the log items to see if the bp is there */
267         list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
268                 struct xfs_buf_log_item *bip;
269                 bip = (struct xfs_buf_log_item *)lidp->lid_item;
270                 if (bip->bli_item.li_type == XFS_LI_BUF &&
271                     XFS_BUF_ADDR(bip->bli_buf) == bno)
272                         return bip->bli_buf;
273         }
274
275         return NULL;
276 }
277
278 STATIC void
279 xfs_check_block(
280         struct xfs_btree_block  *block,
281         xfs_mount_t             *mp,
282         int                     root,
283         short                   sz)
284 {
285         int                     i, j, dmxr;
286         __be64                  *pp, *thispa;   /* pointer to block address */
287         xfs_bmbt_key_t          *prevp, *keyp;
288
289         ASSERT(be16_to_cpu(block->bb_level) > 0);
290
291         prevp = NULL;
292         for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
293                 dmxr = mp->m_bmap_dmxr[0];
294                 keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
295
296                 if (prevp) {
297                         ASSERT(be64_to_cpu(prevp->br_startoff) <
298                                be64_to_cpu(keyp->br_startoff));
299                 }
300                 prevp = keyp;
301
302                 /*
303                  * Compare the block numbers to see if there are dups.
304                  */
305                 if (root)
306                         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
307                 else
308                         pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
309
310                 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
311                         if (root)
312                                 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
313                         else
314                                 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
315                         if (*thispa == *pp) {
316                                 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
317                                         __func__, j, i,
318                                         (unsigned long long)be64_to_cpu(*thispa));
319                                 panic("%s: ptrs are equal in node\n",
320                                         __func__);
321                         }
322                 }
323         }
324 }
325
326 /*
327  * Check that the extents for the inode ip are in the right order in all
328  * btree leaves.
329  */
330
331 STATIC void
332 xfs_bmap_check_leaf_extents(
333         xfs_btree_cur_t         *cur,   /* btree cursor or null */
334         xfs_inode_t             *ip,            /* incore inode pointer */
335         int                     whichfork)      /* data or attr fork */
336 {
337         struct xfs_btree_block  *block; /* current btree block */
338         xfs_fsblock_t           bno;    /* block # of "block" */
339         xfs_buf_t               *bp;    /* buffer for "block" */
340         int                     error;  /* error return value */
341         xfs_extnum_t            i=0, j; /* index into the extents list */
342         xfs_ifork_t             *ifp;   /* fork structure */
343         int                     level;  /* btree level, for checking */
344         xfs_mount_t             *mp;    /* file system mount structure */
345         __be64                  *pp;    /* pointer to block address */
346         xfs_bmbt_rec_t          *ep;    /* pointer to current extent */
347         xfs_bmbt_rec_t          last = {0, 0}; /* last extent in prev block */
348         xfs_bmbt_rec_t          *nextp; /* pointer to next extent */
349         int                     bp_release = 0;
350
351         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
352                 return;
353         }
354
355         bno = NULLFSBLOCK;
356         mp = ip->i_mount;
357         ifp = XFS_IFORK_PTR(ip, whichfork);
358         block = ifp->if_broot;
359         /*
360          * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
361          */
362         level = be16_to_cpu(block->bb_level);
363         ASSERT(level > 0);
364         xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
365         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
366         bno = be64_to_cpu(*pp);
367
368         ASSERT(bno != NULLFSBLOCK);
369         ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
370         ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
371
372         /*
373          * Go down the tree until leaf level is reached, following the first
374          * pointer (leftmost) at each level.
375          */
376         while (level-- > 0) {
377                 /* See if buf is in cur first */
378                 bp_release = 0;
379                 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
380                 if (!bp) {
381                         bp_release = 1;
382                         error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
383                                                 XFS_BMAP_BTREE_REF,
384                                                 &xfs_bmbt_buf_ops);
385                         if (error)
386                                 goto error_norelse;
387                 }
388                 block = XFS_BUF_TO_BLOCK(bp);
389                 if (level == 0)
390                         break;
391
392                 /*
393                  * Check this block for basic sanity (increasing keys and
394                  * no duplicate blocks).
395                  */
396
397                 xfs_check_block(block, mp, 0, 0);
398                 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
399                 bno = be64_to_cpu(*pp);
400                 XFS_WANT_CORRUPTED_GOTO(mp,
401                                         XFS_FSB_SANITY_CHECK(mp, bno), error0);
402                 if (bp_release) {
403                         bp_release = 0;
404                         xfs_trans_brelse(NULL, bp);
405                 }
406         }
407
408         /*
409          * Here with bp and block set to the leftmost leaf node in the tree.
410          */
411         i = 0;
412
413         /*
414          * Loop over all leaf nodes checking that all extents are in the right order.
415          */
416         for (;;) {
417                 xfs_fsblock_t   nextbno;
418                 xfs_extnum_t    num_recs;
419
420
421                 num_recs = xfs_btree_get_numrecs(block);
422
423                 /*
424                  * Read-ahead the next leaf block, if any.
425                  */
426
427                 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
428
429                 /*
430                  * Check all the extents to make sure they are OK.
431                  * If we had a previous block, the last entry should
432                  * conform with the first entry in this one.
433                  */
434
435                 ep = XFS_BMBT_REC_ADDR(mp, block, 1);
436                 if (i) {
437                         ASSERT(xfs_bmbt_disk_get_startoff(&last) +
438                                xfs_bmbt_disk_get_blockcount(&last) <=
439                                xfs_bmbt_disk_get_startoff(ep));
440                 }
441                 for (j = 1; j < num_recs; j++) {
442                         nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
443                         ASSERT(xfs_bmbt_disk_get_startoff(ep) +
444                                xfs_bmbt_disk_get_blockcount(ep) <=
445                                xfs_bmbt_disk_get_startoff(nextp));
446                         ep = nextp;
447                 }
448
449                 last = *ep;
450                 i += num_recs;
451                 if (bp_release) {
452                         bp_release = 0;
453                         xfs_trans_brelse(NULL, bp);
454                 }
455                 bno = nextbno;
456                 /*
457                  * If we've reached the end, stop.
458                  */
459                 if (bno == NULLFSBLOCK)
460                         break;
461
462                 bp_release = 0;
463                 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
464                 if (!bp) {
465                         bp_release = 1;
466                         error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
467                                                 XFS_BMAP_BTREE_REF,
468                                                 &xfs_bmbt_buf_ops);
469                         if (error)
470                                 goto error_norelse;
471                 }
472                 block = XFS_BUF_TO_BLOCK(bp);
473         }
474         if (bp_release) {
475                 bp_release = 0;
476                 xfs_trans_brelse(NULL, bp);
477         }
478         return;
479
480 error0:
481         xfs_warn(mp, "%s: at error0", __func__);
482         if (bp_release)
483                 xfs_trans_brelse(NULL, bp);
484 error_norelse:
485         xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
486                 __func__, i);
487         panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
488         return;
489 }
490
491 /*
492  * Add bmap trace insert entries for all the contents of the extent records.
493  */
494 void
495 xfs_bmap_trace_exlist(
496         xfs_inode_t     *ip,            /* incore inode pointer */
497         xfs_extnum_t    cnt,            /* count of entries in the list */
498         int             whichfork,      /* data or attr fork */
499         unsigned long   caller_ip)
500 {
501         xfs_extnum_t    idx;            /* extent record index */
502         xfs_ifork_t     *ifp;           /* inode fork pointer */
503         int             state = 0;
504
505         if (whichfork == XFS_ATTR_FORK)
506                 state |= BMAP_ATTRFORK;
507
508         ifp = XFS_IFORK_PTR(ip, whichfork);
509         ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
510         for (idx = 0; idx < cnt; idx++)
511                 trace_xfs_extlist(ip, idx, whichfork, caller_ip);
512 }
513
514 /*
515  * Validate that the bmbt_irecs being returned from bmapi are valid
516  * given the caller's original parameters.  Specifically check the
517  * ranges of the returned irecs to ensure that they only extend beyond
518  * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
519  */
520 STATIC void
521 xfs_bmap_validate_ret(
522         xfs_fileoff_t           bno,
523         xfs_filblks_t           len,
524         int                     flags,
525         xfs_bmbt_irec_t         *mval,
526         int                     nmap,
527         int                     ret_nmap)
528 {
529         int                     i;              /* index to map values */
530
531         ASSERT(ret_nmap <= nmap);
532
533         for (i = 0; i < ret_nmap; i++) {
534                 ASSERT(mval[i].br_blockcount > 0);
535                 if (!(flags & XFS_BMAPI_ENTIRE)) {
536                         ASSERT(mval[i].br_startoff >= bno);
537                         ASSERT(mval[i].br_blockcount <= len);
538                         ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
539                                bno + len);
540                 } else {
541                         ASSERT(mval[i].br_startoff < bno + len);
542                         ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
543                                bno);
544                 }
545                 ASSERT(i == 0 ||
546                        mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
547                        mval[i].br_startoff);
548                 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
549                        mval[i].br_startblock != HOLESTARTBLOCK);
550                 ASSERT(mval[i].br_state == XFS_EXT_NORM ||
551                        mval[i].br_state == XFS_EXT_UNWRITTEN);
552         }
553 }
554
555 #else
556 #define xfs_bmap_check_leaf_extents(cur, ip, whichfork)         do { } while (0)
557 #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
558 #endif /* DEBUG */
559
560 /*
561  * bmap free list manipulation functions
562  */
563
564 /*
565  * Add the extent to the list of extents to be free at transaction end.
566  * The list is maintained sorted (by block number).
567  */
568 void
569 xfs_bmap_add_free(
570         xfs_fsblock_t           bno,            /* fs block number of extent */
571         xfs_filblks_t           len,            /* length of extent */
572         xfs_bmap_free_t         *flist,         /* list of extents */
573         xfs_mount_t             *mp)            /* mount point structure */
574 {
575         xfs_bmap_free_item_t    *cur;           /* current (next) element */
576         xfs_bmap_free_item_t    *new;           /* new element */
577         xfs_bmap_free_item_t    *prev;          /* previous element */
578 #ifdef DEBUG
579         xfs_agnumber_t          agno;
580         xfs_agblock_t           agbno;
581
582         ASSERT(bno != NULLFSBLOCK);
583         ASSERT(len > 0);
584         ASSERT(len <= MAXEXTLEN);
585         ASSERT(!isnullstartblock(bno));
586         agno = XFS_FSB_TO_AGNO(mp, bno);
587         agbno = XFS_FSB_TO_AGBNO(mp, bno);
588         ASSERT(agno < mp->m_sb.sb_agcount);
589         ASSERT(agbno < mp->m_sb.sb_agblocks);
590         ASSERT(len < mp->m_sb.sb_agblocks);
591         ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
592 #endif
593         ASSERT(xfs_bmap_free_item_zone != NULL);
594         new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
595         new->xbfi_startblock = bno;
596         new->xbfi_blockcount = (xfs_extlen_t)len;
597         for (prev = NULL, cur = flist->xbf_first;
598              cur != NULL;
599              prev = cur, cur = cur->xbfi_next) {
600                 if (cur->xbfi_startblock >= bno)
601                         break;
602         }
603         if (prev)
604                 prev->xbfi_next = new;
605         else
606                 flist->xbf_first = new;
607         new->xbfi_next = cur;
608         flist->xbf_count++;
609 }
610
611 /*
612  * Remove the entry "free" from the free item list.  Prev points to the
613  * previous entry, unless "free" is the head of the list.
614  */
615 void
616 xfs_bmap_del_free(
617         xfs_bmap_free_t         *flist, /* free item list header */
618         xfs_bmap_free_item_t    *prev,  /* previous item on list, if any */
619         xfs_bmap_free_item_t    *free)  /* list item to be freed */
620 {
621         if (prev)
622                 prev->xbfi_next = free->xbfi_next;
623         else
624                 flist->xbf_first = free->xbfi_next;
625         flist->xbf_count--;
626         kmem_zone_free(xfs_bmap_free_item_zone, free);
627 }
628
629 /*
630  * Free up any items left in the list.
631  */
632 void
633 xfs_bmap_cancel(
634         xfs_bmap_free_t         *flist) /* list of bmap_free_items */
635 {
636         xfs_bmap_free_item_t    *free;  /* free list item */
637         xfs_bmap_free_item_t    *next;
638
639         if (flist->xbf_count == 0)
640                 return;
641         ASSERT(flist->xbf_first != NULL);
642         for (free = flist->xbf_first; free; free = next) {
643                 next = free->xbfi_next;
644                 xfs_bmap_del_free(flist, NULL, free);
645         }
646         ASSERT(flist->xbf_count == 0);
647 }
648
649 /*
650  * Inode fork format manipulation functions
651  */
652
653 /*
654  * Transform a btree format file with only one leaf node, where the
655  * extents list will fit in the inode, into an extents format file.
656  * Since the file extents are already in-core, all we have to do is
657  * give up the space for the btree root and pitch the leaf block.
658  */
659 STATIC int                              /* error */
660 xfs_bmap_btree_to_extents(
661         xfs_trans_t             *tp,    /* transaction pointer */
662         xfs_inode_t             *ip,    /* incore inode pointer */
663         xfs_btree_cur_t         *cur,   /* btree cursor */
664         int                     *logflagsp, /* inode logging flags */
665         int                     whichfork)  /* data or attr fork */
666 {
667         /* REFERENCED */
668         struct xfs_btree_block  *cblock;/* child btree block */
669         xfs_fsblock_t           cbno;   /* child block number */
670         xfs_buf_t               *cbp;   /* child block's buffer */
671         int                     error;  /* error return value */
672         xfs_ifork_t             *ifp;   /* inode fork data */
673         xfs_mount_t             *mp;    /* mount point structure */
674         __be64                  *pp;    /* ptr to block address */
675         struct xfs_btree_block  *rblock;/* root btree block */
676
677         mp = ip->i_mount;
678         ifp = XFS_IFORK_PTR(ip, whichfork);
679         ASSERT(ifp->if_flags & XFS_IFEXTENTS);
680         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
681         rblock = ifp->if_broot;
682         ASSERT(be16_to_cpu(rblock->bb_level) == 1);
683         ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
684         ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
685         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
686         cbno = be64_to_cpu(*pp);
687         *logflagsp = 0;
688 #ifdef DEBUG
689         if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
690                 return error;
691 #endif
692         error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
693                                 &xfs_bmbt_buf_ops);
694         if (error)
695                 return error;
696         cblock = XFS_BUF_TO_BLOCK(cbp);
697         if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
698                 return error;
699         xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
700         ip->i_d.di_nblocks--;
701         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
702         xfs_trans_binval(tp, cbp);
703         if (cur->bc_bufs[0] == cbp)
704                 cur->bc_bufs[0] = NULL;
705         xfs_iroot_realloc(ip, -1, whichfork);
706         ASSERT(ifp->if_broot == NULL);
707         ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
708         XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
709         *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
710         return 0;
711 }
712
713 /*
714  * Convert an extents-format file into a btree-format file.
715  * The new file will have a root block (in the inode) and a single child block.
716  */
717 STATIC int                                      /* error */
718 xfs_bmap_extents_to_btree(
719         xfs_trans_t             *tp,            /* transaction pointer */
720         xfs_inode_t             *ip,            /* incore inode pointer */
721         xfs_fsblock_t           *firstblock,    /* first-block-allocated */
722         xfs_bmap_free_t         *flist,         /* blocks freed in xaction */
723         xfs_btree_cur_t         **curp,         /* cursor returned to caller */
724         int                     wasdel,         /* converting a delayed alloc */
725         int                     *logflagsp,     /* inode logging flags */
726         int                     whichfork)      /* data or attr fork */
727 {
728         struct xfs_btree_block  *ablock;        /* allocated (child) bt block */
729         xfs_buf_t               *abp;           /* buffer for ablock */
730         xfs_alloc_arg_t         args;           /* allocation arguments */
731         xfs_bmbt_rec_t          *arp;           /* child record pointer */
732         struct xfs_btree_block  *block;         /* btree root block */
733         xfs_btree_cur_t         *cur;           /* bmap btree cursor */
734         xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
735         int                     error;          /* error return value */
736         xfs_extnum_t            i, cnt;         /* extent record index */
737         xfs_ifork_t             *ifp;           /* inode fork pointer */
738         xfs_bmbt_key_t          *kp;            /* root block key pointer */
739         xfs_mount_t             *mp;            /* mount structure */
740         xfs_extnum_t            nextents;       /* number of file extents */
741         xfs_bmbt_ptr_t          *pp;            /* root block address pointer */
742
743         mp = ip->i_mount;
744         ifp = XFS_IFORK_PTR(ip, whichfork);
745         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
746
747         /*
748          * Make space in the inode incore.
749          */
750         xfs_iroot_realloc(ip, 1, whichfork);
751         ifp->if_flags |= XFS_IFBROOT;
752
753         /*
754          * Fill in the root.
755          */
756         block = ifp->if_broot;
757         if (xfs_sb_version_hascrc(&mp->m_sb))
758                 xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
759                                  XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino,
760                                  XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
761         else
762                 xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
763                                  XFS_BMAP_MAGIC, 1, 1, ip->i_ino,
764                                  XFS_BTREE_LONG_PTRS);
765
766         /*
767          * Need a cursor.  Can't allocate until bb_level is filled in.
768          */
769         cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
770         cur->bc_private.b.firstblock = *firstblock;
771         cur->bc_private.b.flist = flist;
772         cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
773         /*
774          * Convert to a btree with two levels, one record in root.
775          */
776         XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
777         memset(&args, 0, sizeof(args));
778         args.tp = tp;
779         args.mp = mp;
780         args.firstblock = *firstblock;
781         if (*firstblock == NULLFSBLOCK) {
782                 args.type = XFS_ALLOCTYPE_START_BNO;
783                 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
784         } else if (flist->xbf_low) {
785                 args.type = XFS_ALLOCTYPE_START_BNO;
786                 args.fsbno = *firstblock;
787         } else {
788                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
789                 args.fsbno = *firstblock;
790         }
791         args.minlen = args.maxlen = args.prod = 1;
792         args.wasdel = wasdel;
793         *logflagsp = 0;
794         if ((error = xfs_alloc_vextent(&args))) {
795                 xfs_iroot_realloc(ip, -1, whichfork);
796                 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
797                 return error;
798         }
799         /*
800          * Allocation can't fail, the space was reserved.
801          */
802         ASSERT(args.fsbno != NULLFSBLOCK);
803         ASSERT(*firstblock == NULLFSBLOCK ||
804                args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
805                (flist->xbf_low &&
806                 args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
807         *firstblock = cur->bc_private.b.firstblock = args.fsbno;
808         cur->bc_private.b.allocated++;
809         ip->i_d.di_nblocks++;
810         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
811         abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
812         /*
813          * Fill in the child block.
814          */
815         abp->b_ops = &xfs_bmbt_buf_ops;
816         ablock = XFS_BUF_TO_BLOCK(abp);
817         if (xfs_sb_version_hascrc(&mp->m_sb))
818                 xfs_btree_init_block_int(mp, ablock, abp->b_bn,
819                                 XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
820                                 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
821         else
822                 xfs_btree_init_block_int(mp, ablock, abp->b_bn,
823                                 XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
824                                 XFS_BTREE_LONG_PTRS);
825
826         arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
827         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
828         for (cnt = i = 0; i < nextents; i++) {
829                 ep = xfs_iext_get_ext(ifp, i);
830                 if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
831                         arp->l0 = cpu_to_be64(ep->l0);
832                         arp->l1 = cpu_to_be64(ep->l1);
833                         arp++; cnt++;
834                 }
835         }
836         ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
837         xfs_btree_set_numrecs(ablock, cnt);
838
839         /*
840          * Fill in the root key and pointer.
841          */
842         kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
843         arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
844         kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
845         pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
846                                                 be16_to_cpu(block->bb_level)));
847         *pp = cpu_to_be64(args.fsbno);
848
849         /*
850          * Do all this logging at the end so that
851          * the root is at the right level.
852          */
853         xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
854         xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
855         ASSERT(*curp == NULL);
856         *curp = cur;
857         *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
858         return 0;
859 }
860
861 /*
862  * Convert a local file to an extents file.
863  * This code is out of bounds for data forks of regular files,
864  * since the file data needs to get logged so things will stay consistent.
865  * (The bmap-level manipulations are ok, though).
866  */
867 void
868 xfs_bmap_local_to_extents_empty(
869         struct xfs_inode        *ip,
870         int                     whichfork)
871 {
872         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
873
874         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
875         ASSERT(ifp->if_bytes == 0);
876         ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
877
878         xfs_bmap_forkoff_reset(ip, whichfork);
879         ifp->if_flags &= ~XFS_IFINLINE;
880         ifp->if_flags |= XFS_IFEXTENTS;
881         XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
882 }
883
884
885 STATIC int                              /* error */
886 xfs_bmap_local_to_extents(
887         xfs_trans_t     *tp,            /* transaction pointer */
888         xfs_inode_t     *ip,            /* incore inode pointer */
889         xfs_fsblock_t   *firstblock,    /* first block allocated in xaction */
890         xfs_extlen_t    total,          /* total blocks needed by transaction */
891         int             *logflagsp,     /* inode logging flags */
892         int             whichfork,
893         void            (*init_fn)(struct xfs_trans *tp,
894                                    struct xfs_buf *bp,
895                                    struct xfs_inode *ip,
896                                    struct xfs_ifork *ifp))
897 {
898         int             error = 0;
899         int             flags;          /* logging flags returned */
900         xfs_ifork_t     *ifp;           /* inode fork pointer */
901         xfs_alloc_arg_t args;           /* allocation arguments */
902         xfs_buf_t       *bp;            /* buffer for extent block */
903         xfs_bmbt_rec_host_t *ep;        /* extent record pointer */
904
905         /*
906          * We don't want to deal with the case of keeping inode data inline yet.
907          * So sending the data fork of a regular inode is invalid.
908          */
909         ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
910         ifp = XFS_IFORK_PTR(ip, whichfork);
911         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
912
913         if (!ifp->if_bytes) {
914                 xfs_bmap_local_to_extents_empty(ip, whichfork);
915                 flags = XFS_ILOG_CORE;
916                 goto done;
917         }
918
919         flags = 0;
920         error = 0;
921         ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) ==
922                                                                 XFS_IFINLINE);
923         memset(&args, 0, sizeof(args));
924         args.tp = tp;
925         args.mp = ip->i_mount;
926         args.firstblock = *firstblock;
927         /*
928          * Allocate a block.  We know we need only one, since the
929          * file currently fits in an inode.
930          */
931         if (*firstblock == NULLFSBLOCK) {
932                 args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
933                 args.type = XFS_ALLOCTYPE_START_BNO;
934         } else {
935                 args.fsbno = *firstblock;
936                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
937         }
938         args.total = total;
939         args.minlen = args.maxlen = args.prod = 1;
940         error = xfs_alloc_vextent(&args);
941         if (error)
942                 goto done;
943
944         /* Can't fail, the space was reserved. */
945         ASSERT(args.fsbno != NULLFSBLOCK);
946         ASSERT(args.len == 1);
947         *firstblock = args.fsbno;
948         bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
949
950         /*
951          * Initialize the block, copy the data and log the remote buffer.
952          *
953          * The callout is responsible for logging because the remote format
954          * might differ from the local format and thus we don't know how much to
955          * log here. Note that init_fn must also set the buffer log item type
956          * correctly.
957          */
958         init_fn(tp, bp, ip, ifp);
959
960         /* account for the change in fork size */
961         xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
962         xfs_bmap_local_to_extents_empty(ip, whichfork);
963         flags |= XFS_ILOG_CORE;
964
965         xfs_iext_add(ifp, 0, 1);
966         ep = xfs_iext_get_ext(ifp, 0);
967         xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
968         trace_xfs_bmap_post_update(ip, 0,
969                         whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
970                         _THIS_IP_);
971         XFS_IFORK_NEXT_SET(ip, whichfork, 1);
972         ip->i_d.di_nblocks = 1;
973         xfs_trans_mod_dquot_byino(tp, ip,
974                 XFS_TRANS_DQ_BCOUNT, 1L);
975         flags |= xfs_ilog_fext(whichfork);
976
977 done:
978         *logflagsp = flags;
979         return error;
980 }
981
982 /*
983  * Called from xfs_bmap_add_attrfork to handle btree format files.
984  */
985 STATIC int                                      /* error */
986 xfs_bmap_add_attrfork_btree(
987         xfs_trans_t             *tp,            /* transaction pointer */
988         xfs_inode_t             *ip,            /* incore inode pointer */
989         xfs_fsblock_t           *firstblock,    /* first block allocated */
990         xfs_bmap_free_t         *flist,         /* blocks to free at commit */
991         int                     *flags)         /* inode logging flags */
992 {
993         xfs_btree_cur_t         *cur;           /* btree cursor */
994         int                     error;          /* error return value */
995         xfs_mount_t             *mp;            /* file system mount struct */
996         int                     stat;           /* newroot status */
997
998         mp = ip->i_mount;
999         if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
1000                 *flags |= XFS_ILOG_DBROOT;
1001         else {
1002                 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
1003                 cur->bc_private.b.flist = flist;
1004                 cur->bc_private.b.firstblock = *firstblock;
1005                 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
1006                         goto error0;
1007                 /* must be at least one entry */
1008                 XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
1009                 if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
1010                         goto error0;
1011                 if (stat == 0) {
1012                         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1013                         return -ENOSPC;
1014                 }
1015                 *firstblock = cur->bc_private.b.firstblock;
1016                 cur->bc_private.b.allocated = 0;
1017                 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1018         }
1019         return 0;
1020 error0:
1021         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1022         return error;
1023 }
1024
1025 /*
1026  * Called from xfs_bmap_add_attrfork to handle extents format files.
1027  */
1028 STATIC int                                      /* error */
1029 xfs_bmap_add_attrfork_extents(
1030         xfs_trans_t             *tp,            /* transaction pointer */
1031         xfs_inode_t             *ip,            /* incore inode pointer */
1032         xfs_fsblock_t           *firstblock,    /* first block allocated */
1033         xfs_bmap_free_t         *flist,         /* blocks to free at commit */
1034         int                     *flags)         /* inode logging flags */
1035 {
1036         xfs_btree_cur_t         *cur;           /* bmap btree cursor */
1037         int                     error;          /* error return value */
1038
1039         if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
1040                 return 0;
1041         cur = NULL;
1042         error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0,
1043                 flags, XFS_DATA_FORK);
1044         if (cur) {
1045                 cur->bc_private.b.allocated = 0;
1046                 xfs_btree_del_cursor(cur,
1047                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
1048         }
1049         return error;
1050 }
1051
1052 /*
1053  * Called from xfs_bmap_add_attrfork to handle local format files. Each
1054  * different data fork content type needs a different callout to do the
1055  * conversion. Some are basic and only require special block initialisation
1056  * callouts for the data formating, others (directories) are so specialised they
1057  * handle everything themselves.
1058  *
1059  * XXX (dgc): investigate whether directory conversion can use the generic
1060  * formatting callout. It should be possible - it's just a very complex
1061  * formatter.
1062  */
1063 STATIC int                                      /* error */
1064 xfs_bmap_add_attrfork_local(
1065         xfs_trans_t             *tp,            /* transaction pointer */
1066         xfs_inode_t             *ip,            /* incore inode pointer */
1067         xfs_fsblock_t           *firstblock,    /* first block allocated */
1068         xfs_bmap_free_t         *flist,         /* blocks to free at commit */
1069         int                     *flags)         /* inode logging flags */
1070 {
1071         xfs_da_args_t           dargs;          /* args for dir/attr code */
1072
1073         if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
1074                 return 0;
1075
1076         if (S_ISDIR(ip->i_d.di_mode)) {
1077                 memset(&dargs, 0, sizeof(dargs));
1078                 dargs.geo = ip->i_mount->m_dir_geo;
1079                 dargs.dp = ip;
1080                 dargs.firstblock = firstblock;
1081                 dargs.flist = flist;
1082                 dargs.total = dargs.geo->fsbcount;
1083                 dargs.whichfork = XFS_DATA_FORK;
1084                 dargs.trans = tp;
1085                 return xfs_dir2_sf_to_block(&dargs);
1086         }
1087
1088         if (S_ISLNK(ip->i_d.di_mode))
1089                 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
1090                                                  flags, XFS_DATA_FORK,
1091                                                  xfs_symlink_local_to_remote);
1092
1093         /* should only be called for types that support local format data */
1094         ASSERT(0);
1095         return -EFSCORRUPTED;
1096 }
1097
1098 /*
1099  * Convert inode from non-attributed to attributed.
1100  * Must not be in a transaction, ip must not be locked.
1101  */
1102 int                                             /* error code */
1103 xfs_bmap_add_attrfork(
1104         xfs_inode_t             *ip,            /* incore inode pointer */
1105         int                     size,           /* space new attribute needs */
1106         int                     rsvd)           /* xact may use reserved blks */
1107 {
1108         xfs_fsblock_t           firstblock;     /* 1st block/ag allocated */
1109         xfs_bmap_free_t         flist;          /* freed extent records */
1110         xfs_mount_t             *mp;            /* mount structure */
1111         xfs_trans_t             *tp;            /* transaction pointer */
1112         int                     blks;           /* space reservation */
1113         int                     version = 1;    /* superblock attr version */
1114         int                     committed;      /* xaction was committed */
1115         int                     logflags;       /* logging flags */
1116         int                     error;          /* error return value */
1117
1118         ASSERT(XFS_IFORK_Q(ip) == 0);
1119
1120         mp = ip->i_mount;
1121         ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1122         tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
1123         blks = XFS_ADDAFORK_SPACE_RES(mp);
1124         if (rsvd)
1125                 tp->t_flags |= XFS_TRANS_RESERVE;
1126         error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
1127         if (error) {
1128                 xfs_trans_cancel(tp);
1129                 return error;
1130         }
1131         xfs_ilock(ip, XFS_ILOCK_EXCL);
1132         error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1133                         XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1134                         XFS_QMOPT_RES_REGBLKS);
1135         if (error)
1136                 goto trans_cancel;
1137         if (XFS_IFORK_Q(ip))
1138                 goto trans_cancel;
1139         if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
1140                 /*
1141                  * For inodes coming from pre-6.2 filesystems.
1142                  */
1143                 ASSERT(ip->i_d.di_aformat == 0);
1144                 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
1145         }
1146         ASSERT(ip->i_d.di_anextents == 0);
1147
1148         xfs_trans_ijoin(tp, ip, 0);
1149         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1150
1151         switch (ip->i_d.di_format) {
1152         case XFS_DINODE_FMT_DEV:
1153                 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1154                 break;
1155         case XFS_DINODE_FMT_UUID:
1156                 ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
1157                 break;
1158         case XFS_DINODE_FMT_LOCAL:
1159         case XFS_DINODE_FMT_EXTENTS:
1160         case XFS_DINODE_FMT_BTREE:
1161                 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1162                 if (!ip->i_d.di_forkoff)
1163                         ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1164                 else if (mp->m_flags & XFS_MOUNT_ATTR2)
1165                         version = 2;
1166                 break;
1167         default:
1168                 ASSERT(0);
1169                 error = -EINVAL;
1170                 goto trans_cancel;
1171         }
1172
1173         ASSERT(ip->i_afp == NULL);
1174         ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
1175         ip->i_afp->if_flags = XFS_IFEXTENTS;
1176         logflags = 0;
1177         xfs_bmap_init(&flist, &firstblock);
1178         switch (ip->i_d.di_format) {
1179         case XFS_DINODE_FMT_LOCAL:
1180                 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist,
1181                         &logflags);
1182                 break;
1183         case XFS_DINODE_FMT_EXTENTS:
1184                 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
1185                         &flist, &logflags);
1186                 break;
1187         case XFS_DINODE_FMT_BTREE:
1188                 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist,
1189                         &logflags);
1190                 break;
1191         default:
1192                 error = 0;
1193                 break;
1194         }
1195         if (logflags)
1196                 xfs_trans_log_inode(tp, ip, logflags);
1197         if (error)
1198                 goto bmap_cancel;
1199         if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1200            (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1201                 bool log_sb = false;
1202
1203                 spin_lock(&mp->m_sb_lock);
1204                 if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1205                         xfs_sb_version_addattr(&mp->m_sb);
1206                         log_sb = true;
1207                 }
1208                 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1209                         xfs_sb_version_addattr2(&mp->m_sb);
1210                         log_sb = true;
1211                 }
1212                 spin_unlock(&mp->m_sb_lock);
1213                 if (log_sb)
1214                         xfs_log_sb(tp);
1215         }
1216
1217         error = xfs_bmap_finish(&tp, &flist, &committed);
1218         if (error)
1219                 goto bmap_cancel;
1220         error = xfs_trans_commit(tp);
1221         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1222         return error;
1223
1224 bmap_cancel:
1225         xfs_bmap_cancel(&flist);
1226 trans_cancel:
1227         xfs_trans_cancel(tp);
1228         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1229         return error;
1230 }
1231
1232 /*
1233  * Internal and external extent tree search functions.
1234  */
1235
1236 /*
1237  * Read in the extents to if_extents.
1238  * All inode fields are set up by caller, we just traverse the btree
1239  * and copy the records in. If the file system cannot contain unwritten
1240  * extents, the records are checked for no "state" flags.
1241  */
1242 int                                     /* error */
1243 xfs_bmap_read_extents(
1244         xfs_trans_t             *tp,    /* transaction pointer */
1245         xfs_inode_t             *ip,    /* incore inode */
1246         int                     whichfork) /* data or attr fork */
1247 {
1248         struct xfs_btree_block  *block; /* current btree block */
1249         xfs_fsblock_t           bno;    /* block # of "block" */
1250         xfs_buf_t               *bp;    /* buffer for "block" */
1251         int                     error;  /* error return value */
1252         xfs_exntfmt_t           exntf;  /* XFS_EXTFMT_NOSTATE, if checking */
1253         xfs_extnum_t            i, j;   /* index into the extents list */
1254         xfs_ifork_t             *ifp;   /* fork structure */
1255         int                     level;  /* btree level, for checking */
1256         xfs_mount_t             *mp;    /* file system mount structure */
1257         __be64                  *pp;    /* pointer to block address */
1258         /* REFERENCED */
1259         xfs_extnum_t            room;   /* number of entries there's room for */
1260
1261         bno = NULLFSBLOCK;
1262         mp = ip->i_mount;
1263         ifp = XFS_IFORK_PTR(ip, whichfork);
1264         exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
1265                                         XFS_EXTFMT_INODE(ip);
1266         block = ifp->if_broot;
1267         /*
1268          * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
1269          */
1270         level = be16_to_cpu(block->bb_level);
1271         ASSERT(level > 0);
1272         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
1273         bno = be64_to_cpu(*pp);
1274         ASSERT(bno != NULLFSBLOCK);
1275         ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
1276         ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
1277         /*
1278          * Go down the tree until leaf level is reached, following the first
1279          * pointer (leftmost) at each level.
1280          */
1281         while (level-- > 0) {
1282                 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1283                                 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1284                 if (error)
1285                         return error;
1286                 block = XFS_BUF_TO_BLOCK(bp);
1287                 if (level == 0)
1288                         break;
1289                 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
1290                 bno = be64_to_cpu(*pp);
1291                 XFS_WANT_CORRUPTED_GOTO(mp,
1292                         XFS_FSB_SANITY_CHECK(mp, bno), error0);
1293                 xfs_trans_brelse(tp, bp);
1294         }
1295         /*
1296          * Here with bp and block set to the leftmost leaf node in the tree.
1297          */
1298         room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1299         i = 0;
1300         /*
1301          * Loop over all leaf nodes.  Copy information to the extent records.
1302          */
1303         for (;;) {
1304                 xfs_bmbt_rec_t  *frp;
1305                 xfs_fsblock_t   nextbno;
1306                 xfs_extnum_t    num_recs;
1307                 xfs_extnum_t    start;
1308
1309                 num_recs = xfs_btree_get_numrecs(block);
1310                 if (unlikely(i + num_recs > room)) {
1311                         ASSERT(i + num_recs <= room);
1312                         xfs_warn(ip->i_mount,
1313                                 "corrupt dinode %Lu, (btree extents).",
1314                                 (unsigned long long) ip->i_ino);
1315                         XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
1316                                 XFS_ERRLEVEL_LOW, ip->i_mount, block);
1317                         goto error0;
1318                 }
1319                 /*
1320                  * Read-ahead the next leaf block, if any.
1321                  */
1322                 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
1323                 if (nextbno != NULLFSBLOCK)
1324                         xfs_btree_reada_bufl(mp, nextbno, 1,
1325                                              &xfs_bmbt_buf_ops);
1326                 /*
1327                  * Copy records into the extent records.
1328                  */
1329                 frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1330                 start = i;
1331                 for (j = 0; j < num_recs; j++, i++, frp++) {
1332                         xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
1333                         trp->l0 = be64_to_cpu(frp->l0);
1334                         trp->l1 = be64_to_cpu(frp->l1);
1335                 }
1336                 if (exntf == XFS_EXTFMT_NOSTATE) {
1337                         /*
1338                          * Check all attribute bmap btree records and
1339                          * any "older" data bmap btree records for a
1340                          * set bit in the "extent flag" position.
1341                          */
1342                         if (unlikely(xfs_check_nostate_extents(ifp,
1343                                         start, num_recs))) {
1344                                 XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
1345                                                  XFS_ERRLEVEL_LOW,
1346                                                  ip->i_mount);
1347                                 goto error0;
1348                         }
1349                 }
1350                 xfs_trans_brelse(tp, bp);
1351                 bno = nextbno;
1352                 /*
1353                  * If we've reached the end, stop.
1354                  */
1355                 if (bno == NULLFSBLOCK)
1356                         break;
1357                 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1358                                 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1359                 if (error)
1360                         return error;
1361                 block = XFS_BUF_TO_BLOCK(bp);
1362         }
1363         ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
1364         ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
1365         XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
1366         return 0;
1367 error0:
1368         xfs_trans_brelse(tp, bp);
1369         return -EFSCORRUPTED;
1370 }
1371
1372
1373 /*
1374  * Search the extent records for the entry containing block bno.
1375  * If bno lies in a hole, point to the next entry.  If bno lies
1376  * past eof, *eofp will be set, and *prevp will contain the last
1377  * entry (null if none).  Else, *lastxp will be set to the index
1378  * of the found entry; *gotp will contain the entry.
1379  */
1380 STATIC xfs_bmbt_rec_host_t *            /* pointer to found extent entry */
1381 xfs_bmap_search_multi_extents(
1382         xfs_ifork_t     *ifp,           /* inode fork pointer */
1383         xfs_fileoff_t   bno,            /* block number searched for */
1384         int             *eofp,          /* out: end of file found */
1385         xfs_extnum_t    *lastxp,        /* out: last extent index */
1386         xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
1387         xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
1388 {
1389         xfs_bmbt_rec_host_t *ep;                /* extent record pointer */
1390         xfs_extnum_t    lastx;          /* last extent index */
1391
1392         /*
1393          * Initialize the extent entry structure to catch access to
1394          * uninitialized br_startblock field.
1395          */
1396         gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
1397         gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
1398         gotp->br_state = XFS_EXT_INVALID;
1399         gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
1400         prevp->br_startoff = NULLFILEOFF;
1401
1402         ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
1403         if (lastx > 0) {
1404                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
1405         }
1406         if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
1407                 xfs_bmbt_get_all(ep, gotp);
1408                 *eofp = 0;
1409         } else {
1410                 if (lastx > 0) {
1411                         *gotp = *prevp;
1412                 }
1413                 *eofp = 1;
1414                 ep = NULL;
1415         }
1416         *lastxp = lastx;
1417         return ep;
1418 }
1419
1420 /*
1421  * Search the extents list for the inode, for the extent containing bno.
1422  * If bno lies in a hole, point to the next entry.  If bno lies past eof,
1423  * *eofp will be set, and *prevp will contain the last entry (null if none).
1424  * Else, *lastxp will be set to the index of the found
1425  * entry; *gotp will contain the entry.
1426  */
1427 STATIC xfs_bmbt_rec_host_t *                 /* pointer to found extent entry */
1428 xfs_bmap_search_extents(
1429         xfs_inode_t     *ip,            /* incore inode pointer */
1430         xfs_fileoff_t   bno,            /* block number searched for */
1431         int             fork,           /* data or attr fork */
1432         int             *eofp,          /* out: end of file found */
1433         xfs_extnum_t    *lastxp,        /* out: last extent index */
1434         xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
1435         xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
1436 {
1437         xfs_ifork_t     *ifp;           /* inode fork pointer */
1438         xfs_bmbt_rec_host_t  *ep;            /* extent record pointer */
1439
1440         XFS_STATS_INC(ip->i_mount, xs_look_exlist);
1441         ifp = XFS_IFORK_PTR(ip, fork);
1442
1443         ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
1444
1445         if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
1446                      !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
1447                 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
1448                                 "Access to block zero in inode %llu "
1449                                 "start_block: %llx start_off: %llx "
1450                                 "blkcnt: %llx extent-state: %x lastx: %x",
1451                         (unsigned long long)ip->i_ino,
1452                         (unsigned long long)gotp->br_startblock,
1453                         (unsigned long long)gotp->br_startoff,
1454                         (unsigned long long)gotp->br_blockcount,
1455                         gotp->br_state, *lastxp);
1456                 *lastxp = NULLEXTNUM;
1457                 *eofp = 1;
1458                 return NULL;
1459         }
1460         return ep;
1461 }
1462
1463 /*
1464  * Returns the file-relative block number of the first unused block(s)
1465  * in the file with at least "len" logically contiguous blocks free.
1466  * This is the lowest-address hole if the file has holes, else the first block
1467  * past the end of file.
1468  * Return 0 if the file is currently local (in-inode).
1469  */
1470 int                                             /* error */
1471 xfs_bmap_first_unused(
1472         xfs_trans_t     *tp,                    /* transaction pointer */
1473         xfs_inode_t     *ip,                    /* incore inode */
1474         xfs_extlen_t    len,                    /* size of hole to find */
1475         xfs_fileoff_t   *first_unused,          /* unused block */
1476         int             whichfork)              /* data or attr fork */
1477 {
1478         int             error;                  /* error return value */
1479         int             idx;                    /* extent record index */
1480         xfs_ifork_t     *ifp;                   /* inode fork pointer */
1481         xfs_fileoff_t   lastaddr;               /* last block number seen */
1482         xfs_fileoff_t   lowest;                 /* lowest useful block */
1483         xfs_fileoff_t   max;                    /* starting useful block */
1484         xfs_fileoff_t   off;                    /* offset for this block */
1485         xfs_extnum_t    nextents;               /* number of extent entries */
1486
1487         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
1488                XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
1489                XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
1490         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1491                 *first_unused = 0;
1492                 return 0;
1493         }
1494         ifp = XFS_IFORK_PTR(ip, whichfork);
1495         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1496             (error = xfs_iread_extents(tp, ip, whichfork)))
1497                 return error;
1498         lowest = *first_unused;
1499         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1500         for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
1501                 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
1502                 off = xfs_bmbt_get_startoff(ep);
1503                 /*
1504                  * See if the hole before this extent will work.
1505                  */
1506                 if (off >= lowest + len && off - max >= len) {
1507                         *first_unused = max;
1508                         return 0;
1509                 }
1510                 lastaddr = off + xfs_bmbt_get_blockcount(ep);
1511                 max = XFS_FILEOFF_MAX(lastaddr, lowest);
1512         }
1513         *first_unused = max;
1514         return 0;
1515 }
1516
1517 /*
1518  * Returns the file-relative block number of the last block - 1 before
1519  * last_block (input value) in the file.
1520  * This is not based on i_size, it is based on the extent records.
1521  * Returns 0 for local files, as they do not have extent records.
1522  */
1523 int                                             /* error */
1524 xfs_bmap_last_before(
1525         xfs_trans_t     *tp,                    /* transaction pointer */
1526         xfs_inode_t     *ip,                    /* incore inode */
1527         xfs_fileoff_t   *last_block,            /* last block */
1528         int             whichfork)              /* data or attr fork */
1529 {
1530         xfs_fileoff_t   bno;                    /* input file offset */
1531         int             eof;                    /* hit end of file */
1532         xfs_bmbt_rec_host_t *ep;                /* pointer to last extent */
1533         int             error;                  /* error return value */
1534         xfs_bmbt_irec_t got;                    /* current extent value */
1535         xfs_ifork_t     *ifp;                   /* inode fork pointer */
1536         xfs_extnum_t    lastx;                  /* last extent used */
1537         xfs_bmbt_irec_t prev;                   /* previous extent value */
1538
1539         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1540             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
1541             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
1542                return -EIO;
1543         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1544                 *last_block = 0;
1545                 return 0;
1546         }
1547         ifp = XFS_IFORK_PTR(ip, whichfork);
1548         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1549             (error = xfs_iread_extents(tp, ip, whichfork)))
1550                 return error;
1551         bno = *last_block - 1;
1552         ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
1553                 &prev);
1554         if (eof || xfs_bmbt_get_startoff(ep) > bno) {
1555                 if (prev.br_startoff == NULLFILEOFF)
1556                         *last_block = 0;
1557                 else
1558                         *last_block = prev.br_startoff + prev.br_blockcount;
1559         }
1560         /*
1561          * Otherwise *last_block is already the right answer.
1562          */
1563         return 0;
1564 }
1565
1566 int
1567 xfs_bmap_last_extent(
1568         struct xfs_trans        *tp,
1569         struct xfs_inode        *ip,
1570         int                     whichfork,
1571         struct xfs_bmbt_irec    *rec,
1572         int                     *is_empty)
1573 {
1574         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1575         int                     error;
1576         int                     nextents;
1577
1578         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1579                 error = xfs_iread_extents(tp, ip, whichfork);
1580                 if (error)
1581                         return error;
1582         }
1583
1584         nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
1585         if (nextents == 0) {
1586                 *is_empty = 1;
1587                 return 0;
1588         }
1589
1590         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
1591         *is_empty = 0;
1592         return 0;
1593 }
1594
1595 /*
1596  * Check the last inode extent to determine whether this allocation will result
1597  * in blocks being allocated at the end of the file. When we allocate new data
1598  * blocks at the end of the file which do not start at the previous data block,
1599  * we will try to align the new blocks at stripe unit boundaries.
1600  *
1601  * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1602  * at, or past the EOF.
1603  */
1604 STATIC int
1605 xfs_bmap_isaeof(
1606         struct xfs_bmalloca     *bma,
1607         int                     whichfork)
1608 {
1609         struct xfs_bmbt_irec    rec;
1610         int                     is_empty;
1611         int                     error;
1612
1613         bma->aeof = 0;
1614         error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1615                                      &is_empty);
1616         if (error)
1617                 return error;
1618
1619         if (is_empty) {
1620                 bma->aeof = 1;
1621                 return 0;
1622         }
1623
1624         /*
1625          * Check if we are allocation or past the last extent, or at least into
1626          * the last delayed allocated extent.
1627          */
1628         bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1629                 (bma->offset >= rec.br_startoff &&
1630                  isnullstartblock(rec.br_startblock));
1631         return 0;
1632 }
1633
1634 /*
1635  * Returns the file-relative block number of the first block past eof in
1636  * the file.  This is not based on i_size, it is based on the extent records.
1637  * Returns 0 for local files, as they do not have extent records.
1638  */
1639 int
1640 xfs_bmap_last_offset(
1641         struct xfs_inode        *ip,
1642         xfs_fileoff_t           *last_block,
1643         int                     whichfork)
1644 {
1645         struct xfs_bmbt_irec    rec;
1646         int                     is_empty;
1647         int                     error;
1648
1649         *last_block = 0;
1650
1651         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
1652                 return 0;
1653
1654         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1655             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1656                return -EIO;
1657
1658         error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1659         if (error || is_empty)
1660                 return error;
1661
1662         *last_block = rec.br_startoff + rec.br_blockcount;
1663         return 0;
1664 }
1665
1666 /*
1667  * Returns whether the selected fork of the inode has exactly one
1668  * block or not.  For the data fork we check this matches di_size,
1669  * implying the file's range is 0..bsize-1.
1670  */
1671 int                                     /* 1=>1 block, 0=>otherwise */
1672 xfs_bmap_one_block(
1673         xfs_inode_t     *ip,            /* incore inode */
1674         int             whichfork)      /* data or attr fork */
1675 {
1676         xfs_bmbt_rec_host_t *ep;        /* ptr to fork's extent */
1677         xfs_ifork_t     *ifp;           /* inode fork pointer */
1678         int             rval;           /* return value */
1679         xfs_bmbt_irec_t s;              /* internal version of extent */
1680
1681 #ifndef DEBUG
1682         if (whichfork == XFS_DATA_FORK)
1683                 return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
1684 #endif  /* !DEBUG */
1685         if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
1686                 return 0;
1687         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1688                 return 0;
1689         ifp = XFS_IFORK_PTR(ip, whichfork);
1690         ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1691         ep = xfs_iext_get_ext(ifp, 0);
1692         xfs_bmbt_get_all(ep, &s);
1693         rval = s.br_startoff == 0 && s.br_blockcount == 1;
1694         if (rval && whichfork == XFS_DATA_FORK)
1695                 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
1696         return rval;
1697 }
1698
1699 /*
1700  * Extent tree manipulation functions used during allocation.
1701  */
1702
1703 /*
1704  * Convert a delayed allocation to a real allocation.
1705  */
1706 STATIC int                              /* error */
1707 xfs_bmap_add_extent_delay_real(
1708         struct xfs_bmalloca     *bma)
1709 {
1710         struct xfs_bmbt_irec    *new = &bma->got;
1711         int                     diff;   /* temp value */
1712         xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
1713         int                     error;  /* error return value */
1714         int                     i;      /* temp state */
1715         xfs_ifork_t             *ifp;   /* inode fork pointer */
1716         xfs_fileoff_t           new_endoff;     /* end offset of new entry */
1717         xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
1718                                         /* left is 0, right is 1, prev is 2 */
1719         int                     rval=0; /* return value (logging flags) */
1720         int                     state = 0;/* state bits, accessed thru macros */
1721         xfs_filblks_t           da_new; /* new count del alloc blocks used */
1722         xfs_filblks_t           da_old; /* old count del alloc blocks used */
1723         xfs_filblks_t           temp=0; /* value for da_new calculations */
1724         xfs_filblks_t           temp2=0;/* value for da_new calculations */
1725         int                     tmp_rval;       /* partial logging flags */
1726         struct xfs_mount        *mp;
1727
1728         mp  = bma->tp ? bma->tp->t_mountp : NULL;
1729         ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
1730
1731         ASSERT(bma->idx >= 0);
1732         ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
1733         ASSERT(!isnullstartblock(new->br_startblock));
1734         ASSERT(!bma->cur ||
1735                (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
1736
1737         XFS_STATS_INC(mp, xs_add_exlist);
1738
1739 #define LEFT            r[0]
1740 #define RIGHT           r[1]
1741 #define PREV            r[2]
1742
1743         /*
1744          * Set up a bunch of variables to make the tests simpler.
1745          */
1746         ep = xfs_iext_get_ext(ifp, bma->idx);
1747         xfs_bmbt_get_all(ep, &PREV);
1748         new_endoff = new->br_startoff + new->br_blockcount;
1749         ASSERT(PREV.br_startoff <= new->br_startoff);
1750         ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1751
1752         da_old = startblockval(PREV.br_startblock);
1753         da_new = 0;
1754
1755         /*
1756          * Set flags determining what part of the previous delayed allocation
1757          * extent is being replaced by a real allocation.
1758          */
1759         if (PREV.br_startoff == new->br_startoff)
1760                 state |= BMAP_LEFT_FILLING;
1761         if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1762                 state |= BMAP_RIGHT_FILLING;
1763
1764         /*
1765          * Check and set flags if this segment has a left neighbor.
1766          * Don't set contiguous if the combined extent would be too large.
1767          */
1768         if (bma->idx > 0) {
1769                 state |= BMAP_LEFT_VALID;
1770                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT);
1771
1772                 if (isnullstartblock(LEFT.br_startblock))
1773                         state |= BMAP_LEFT_DELAY;
1774         }
1775
1776         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1777             LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1778             LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1779             LEFT.br_state == new->br_state &&
1780             LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1781                 state |= BMAP_LEFT_CONTIG;
1782
1783         /*
1784          * Check and set flags if this segment has a right neighbor.
1785          * Don't set contiguous if the combined extent would be too large.
1786          * Also check for all-three-contiguous being too large.
1787          */
1788         if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
1789                 state |= BMAP_RIGHT_VALID;
1790                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
1791
1792                 if (isnullstartblock(RIGHT.br_startblock))
1793                         state |= BMAP_RIGHT_DELAY;
1794         }
1795
1796         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1797             new_endoff == RIGHT.br_startoff &&
1798             new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1799             new->br_state == RIGHT.br_state &&
1800             new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1801             ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1802                        BMAP_RIGHT_FILLING)) !=
1803                       (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1804                        BMAP_RIGHT_FILLING) ||
1805              LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1806                         <= MAXEXTLEN))
1807                 state |= BMAP_RIGHT_CONTIG;
1808
1809         error = 0;
1810         /*
1811          * Switch out based on the FILLING and CONTIG state bits.
1812          */
1813         switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1814                          BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1815         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1816              BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1817                 /*
1818                  * Filling in all of a previously delayed allocation extent.
1819                  * The left and right neighbors are both contiguous with new.
1820                  */
1821                 bma->idx--;
1822                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1823                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
1824                         LEFT.br_blockcount + PREV.br_blockcount +
1825                         RIGHT.br_blockcount);
1826                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1827
1828                 xfs_iext_remove(bma->ip, bma->idx + 1, 2, state);
1829                 bma->ip->i_d.di_nextents--;
1830                 if (bma->cur == NULL)
1831                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1832                 else {
1833                         rval = XFS_ILOG_CORE;
1834                         error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
1835                                         RIGHT.br_startblock,
1836                                         RIGHT.br_blockcount, &i);
1837                         if (error)
1838                                 goto done;
1839                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1840                         error = xfs_btree_delete(bma->cur, &i);
1841                         if (error)
1842                                 goto done;
1843                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1844                         error = xfs_btree_decrement(bma->cur, 0, &i);
1845                         if (error)
1846                                 goto done;
1847                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1848                         error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1849                                         LEFT.br_startblock,
1850                                         LEFT.br_blockcount +
1851                                         PREV.br_blockcount +
1852                                         RIGHT.br_blockcount, LEFT.br_state);
1853                         if (error)
1854                                 goto done;
1855                 }
1856                 break;
1857
1858         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1859                 /*
1860                  * Filling in all of a previously delayed allocation extent.
1861                  * The left neighbor is contiguous, the right is not.
1862                  */
1863                 bma->idx--;
1864
1865                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1866                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
1867                         LEFT.br_blockcount + PREV.br_blockcount);
1868                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1869
1870                 xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1871                 if (bma->cur == NULL)
1872                         rval = XFS_ILOG_DEXT;
1873                 else {
1874                         rval = 0;
1875                         error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
1876                                         LEFT.br_startblock, LEFT.br_blockcount,
1877                                         &i);
1878                         if (error)
1879                                 goto done;
1880                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1881                         error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1882                                         LEFT.br_startblock,
1883                                         LEFT.br_blockcount +
1884                                         PREV.br_blockcount, LEFT.br_state);
1885                         if (error)
1886                                 goto done;
1887                 }
1888                 break;
1889
1890         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1891                 /*
1892                  * Filling in all of a previously delayed allocation extent.
1893                  * The right neighbor is contiguous, the left is not.
1894                  */
1895                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1896                 xfs_bmbt_set_startblock(ep, new->br_startblock);
1897                 xfs_bmbt_set_blockcount(ep,
1898                         PREV.br_blockcount + RIGHT.br_blockcount);
1899                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1900
1901                 xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1902                 if (bma->cur == NULL)
1903                         rval = XFS_ILOG_DEXT;
1904                 else {
1905                         rval = 0;
1906                         error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
1907                                         RIGHT.br_startblock,
1908                                         RIGHT.br_blockcount, &i);
1909                         if (error)
1910                                 goto done;
1911                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1912                         error = xfs_bmbt_update(bma->cur, PREV.br_startoff,
1913                                         new->br_startblock,
1914                                         PREV.br_blockcount +
1915                                         RIGHT.br_blockcount, PREV.br_state);
1916                         if (error)
1917                                 goto done;
1918                 }
1919                 break;
1920
1921         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1922                 /*
1923                  * Filling in all of a previously delayed allocation extent.
1924                  * Neither the left nor right neighbors are contiguous with
1925                  * the new one.
1926                  */
1927                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1928                 xfs_bmbt_set_startblock(ep, new->br_startblock);
1929                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1930
1931                 bma->ip->i_d.di_nextents++;
1932                 if (bma->cur == NULL)
1933                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1934                 else {
1935                         rval = XFS_ILOG_CORE;
1936                         error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
1937                                         new->br_startblock, new->br_blockcount,
1938                                         &i);
1939                         if (error)
1940                                 goto done;
1941                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1942                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
1943                         error = xfs_btree_insert(bma->cur, &i);
1944                         if (error)
1945                                 goto done;
1946                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1947                 }
1948                 break;
1949
1950         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1951                 /*
1952                  * Filling in the first part of a previous delayed allocation.
1953                  * The left neighbor is contiguous.
1954                  */
1955                 trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
1956                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1),
1957                         LEFT.br_blockcount + new->br_blockcount);
1958                 xfs_bmbt_set_startoff(ep,
1959                         PREV.br_startoff + new->br_blockcount);
1960                 trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
1961
1962                 temp = PREV.br_blockcount - new->br_blockcount;
1963                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1964                 xfs_bmbt_set_blockcount(ep, temp);
1965                 if (bma->cur == NULL)
1966                         rval = XFS_ILOG_DEXT;
1967                 else {
1968                         rval = 0;
1969                         error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
1970                                         LEFT.br_startblock, LEFT.br_blockcount,
1971                                         &i);
1972                         if (error)
1973                                 goto done;
1974                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1975                         error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1976                                         LEFT.br_startblock,
1977                                         LEFT.br_blockcount +
1978                                         new->br_blockcount,
1979                                         LEFT.br_state);
1980                         if (error)
1981                                 goto done;
1982                 }
1983                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1984                         startblockval(PREV.br_startblock));
1985                 xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
1986                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1987
1988                 bma->idx--;
1989                 break;
1990
1991         case BMAP_LEFT_FILLING:
1992                 /*
1993                  * Filling in the first part of a previous delayed allocation.
1994                  * The left neighbor is not contiguous.
1995                  */
1996                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1997                 xfs_bmbt_set_startoff(ep, new_endoff);
1998                 temp = PREV.br_blockcount - new->br_blockcount;
1999                 xfs_bmbt_set_blockcount(ep, temp);
2000                 xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
2001                 bma->ip->i_d.di_nextents++;
2002                 if (bma->cur == NULL)
2003                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2004                 else {
2005                         rval = XFS_ILOG_CORE;
2006                         error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2007                                         new->br_startblock, new->br_blockcount,
2008                                         &i);
2009                         if (error)
2010                                 goto done;
2011                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2012                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2013                         error = xfs_btree_insert(bma->cur, &i);
2014                         if (error)
2015                                 goto done;
2016                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2017                 }
2018
2019                 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
2020                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2021                                         bma->firstblock, bma->flist,
2022                                         &bma->cur, 1, &tmp_rval, XFS_DATA_FORK);
2023                         rval |= tmp_rval;
2024                         if (error)
2025                                 goto done;
2026                 }
2027                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2028                         startblockval(PREV.br_startblock) -
2029                         (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2030                 ep = xfs_iext_get_ext(ifp, bma->idx + 1);
2031                 xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2032                 trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2033                 break;
2034
2035         case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2036                 /*
2037                  * Filling in the last part of a previous delayed allocation.
2038                  * The right neighbor is contiguous with the new allocation.
2039                  */
2040                 temp = PREV.br_blockcount - new->br_blockcount;
2041                 trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2042                 xfs_bmbt_set_blockcount(ep, temp);
2043                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1),
2044                         new->br_startoff, new->br_startblock,
2045                         new->br_blockcount + RIGHT.br_blockcount,
2046                         RIGHT.br_state);
2047                 trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2048                 if (bma->cur == NULL)
2049                         rval = XFS_ILOG_DEXT;
2050                 else {
2051                         rval = 0;
2052                         error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
2053                                         RIGHT.br_startblock,
2054                                         RIGHT.br_blockcount, &i);
2055                         if (error)
2056                                 goto done;
2057                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2058                         error = xfs_bmbt_update(bma->cur, new->br_startoff,
2059                                         new->br_startblock,
2060                                         new->br_blockcount +
2061                                         RIGHT.br_blockcount,
2062                                         RIGHT.br_state);
2063                         if (error)
2064                                 goto done;
2065                 }
2066
2067                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2068                         startblockval(PREV.br_startblock));
2069                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2070                 xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2071                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2072
2073                 bma->idx++;
2074                 break;
2075
2076         case BMAP_RIGHT_FILLING:
2077                 /*
2078                  * Filling in the last part of a previous delayed allocation.
2079                  * The right neighbor is not contiguous.
2080                  */
2081                 temp = PREV.br_blockcount - new->br_blockcount;
2082                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2083                 xfs_bmbt_set_blockcount(ep, temp);
2084                 xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state);
2085                 bma->ip->i_d.di_nextents++;
2086                 if (bma->cur == NULL)
2087                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2088                 else {
2089                         rval = XFS_ILOG_CORE;
2090                         error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2091                                         new->br_startblock, new->br_blockcount,
2092                                         &i);
2093                         if (error)
2094                                 goto done;
2095                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2096                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2097                         error = xfs_btree_insert(bma->cur, &i);
2098                         if (error)
2099                                 goto done;
2100                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2101                 }
2102
2103                 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
2104                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2105                                 bma->firstblock, bma->flist, &bma->cur, 1,
2106                                 &tmp_rval, XFS_DATA_FORK);
2107                         rval |= tmp_rval;
2108                         if (error)
2109                                 goto done;
2110                 }
2111                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2112                         startblockval(PREV.br_startblock) -
2113                         (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2114                 ep = xfs_iext_get_ext(ifp, bma->idx);
2115                 xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2116                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2117
2118                 bma->idx++;
2119                 break;
2120
2121         case 0:
2122                 /*
2123                  * Filling in the middle part of a previous delayed allocation.
2124                  * Contiguity is impossible here.
2125                  * This case is avoided almost all the time.
2126                  *
2127                  * We start with a delayed allocation:
2128                  *
2129                  * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
2130                  *  PREV @ idx
2131                  *
2132                  * and we are allocating:
2133                  *                     +rrrrrrrrrrrrrrrrr+
2134                  *                            new
2135                  *
2136                  * and we set it up for insertion as:
2137                  * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
2138                  *                            new
2139                  *  PREV @ idx          LEFT              RIGHT
2140                  *                      inserted at idx + 1
2141                  */
2142                 temp = new->br_startoff - PREV.br_startoff;
2143                 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
2144                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_);
2145                 xfs_bmbt_set_blockcount(ep, temp);      /* truncate PREV */
2146                 LEFT = *new;
2147                 RIGHT.br_state = PREV.br_state;
2148                 RIGHT.br_startblock = nullstartblock(
2149                                 (int)xfs_bmap_worst_indlen(bma->ip, temp2));
2150                 RIGHT.br_startoff = new_endoff;
2151                 RIGHT.br_blockcount = temp2;
2152                 /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
2153                 xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state);
2154                 bma->ip->i_d.di_nextents++;
2155                 if (bma->cur == NULL)
2156                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2157                 else {
2158                         rval = XFS_ILOG_CORE;
2159                         error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2160                                         new->br_startblock, new->br_blockcount,
2161                                         &i);
2162                         if (error)
2163                                 goto done;
2164                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2165                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2166                         error = xfs_btree_insert(bma->cur, &i);
2167                         if (error)
2168                                 goto done;
2169                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2170                 }
2171
2172                 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
2173                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2174                                         bma->firstblock, bma->flist, &bma->cur,
2175                                         1, &tmp_rval, XFS_DATA_FORK);
2176                         rval |= tmp_rval;
2177                         if (error)
2178                                 goto done;
2179                 }
2180                 temp = xfs_bmap_worst_indlen(bma->ip, temp);
2181                 temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
2182                 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
2183                         (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2184                 if (diff > 0) {
2185                         error = xfs_mod_fdblocks(bma->ip->i_mount,
2186                                                  -((int64_t)diff), false);
2187                         ASSERT(!error);
2188                         if (error)
2189                                 goto done;
2190                 }
2191
2192                 ep = xfs_iext_get_ext(ifp, bma->idx);
2193                 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2194                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2195                 trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
2196                 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2),
2197                         nullstartblock((int)temp2));
2198                 trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
2199
2200                 bma->idx++;
2201                 da_new = temp + temp2;
2202                 break;
2203
2204         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2205         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2206         case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2207         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2208         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2209         case BMAP_LEFT_CONTIG:
2210         case BMAP_RIGHT_CONTIG:
2211                 /*
2212                  * These cases are all impossible.
2213                  */
2214                 ASSERT(0);
2215         }
2216
2217         /* convert to a btree if necessary */
2218         if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
2219                 int     tmp_logflags;   /* partial log flag return val */
2220
2221                 ASSERT(bma->cur == NULL);
2222                 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2223                                 bma->firstblock, bma->flist, &bma->cur,
2224                                 da_old > 0, &tmp_logflags, XFS_DATA_FORK);
2225                 bma->logflags |= tmp_logflags;
2226                 if (error)
2227                         goto done;
2228         }
2229
2230         /* adjust for changes in reserved delayed indirect blocks */
2231         if (da_old || da_new) {
2232                 temp = da_new;
2233                 if (bma->cur)
2234                         temp += bma->cur->bc_private.b.allocated;
2235                 ASSERT(temp <= da_old);
2236                 if (temp < da_old)
2237                         xfs_mod_fdblocks(bma->ip->i_mount,
2238                                         (int64_t)(da_old - temp), false);
2239         }
2240
2241         /* clear out the allocated field, done with it now in any case. */
2242         if (bma->cur)
2243                 bma->cur->bc_private.b.allocated = 0;
2244
2245         xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK);
2246 done:
2247         bma->logflags |= rval;
2248         return error;
2249 #undef  LEFT
2250 #undef  RIGHT
2251 #undef  PREV
2252 }
2253
2254 /*
2255  * Convert an unwritten allocation to a real allocation or vice versa.
2256  */
2257 STATIC int                              /* error */
2258 xfs_bmap_add_extent_unwritten_real(
2259         struct xfs_trans        *tp,
2260         xfs_inode_t             *ip,    /* incore inode pointer */
2261         xfs_extnum_t            *idx,   /* extent number to update/insert */
2262         xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
2263         xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
2264         xfs_fsblock_t           *first, /* pointer to firstblock variable */
2265         xfs_bmap_free_t         *flist, /* list of extents to be freed */
2266         int                     *logflagsp) /* inode logging flags */
2267 {
2268         xfs_btree_cur_t         *cur;   /* btree cursor */
2269         xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
2270         int                     error;  /* error return value */
2271         int                     i;      /* temp state */
2272         xfs_ifork_t             *ifp;   /* inode fork pointer */
2273         xfs_fileoff_t           new_endoff;     /* end offset of new entry */
2274         xfs_exntst_t            newext; /* new extent state */
2275         xfs_exntst_t            oldext; /* old extent state */
2276         xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
2277                                         /* left is 0, right is 1, prev is 2 */
2278         int                     rval=0; /* return value (logging flags) */
2279         int                     state = 0;/* state bits, accessed thru macros */
2280         struct xfs_mount        *mp = tp->t_mountp;
2281
2282         *logflagsp = 0;
2283
2284         cur = *curp;
2285         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
2286
2287         ASSERT(*idx >= 0);
2288         ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
2289         ASSERT(!isnullstartblock(new->br_startblock));
2290
2291         XFS_STATS_INC(mp, xs_add_exlist);
2292
2293 #define LEFT            r[0]
2294 #define RIGHT           r[1]
2295 #define PREV            r[2]
2296
2297         /*
2298          * Set up a bunch of variables to make the tests simpler.
2299          */
2300         error = 0;
2301         ep = xfs_iext_get_ext(ifp, *idx);
2302         xfs_bmbt_get_all(ep, &PREV);
2303         newext = new->br_state;
2304         oldext = (newext == XFS_EXT_UNWRITTEN) ?
2305                 XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
2306         ASSERT(PREV.br_state == oldext);
2307         new_endoff = new->br_startoff + new->br_blockcount;
2308         ASSERT(PREV.br_startoff <= new->br_startoff);
2309         ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2310
2311         /*
2312          * Set flags determining what part of the previous oldext allocation
2313          * extent is being replaced by a newext allocation.
2314          */
2315         if (PREV.br_startoff == new->br_startoff)
2316                 state |= BMAP_LEFT_FILLING;
2317         if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2318                 state |= BMAP_RIGHT_FILLING;
2319
2320         /*
2321          * Check and set flags if this segment has a left neighbor.
2322          * Don't set contiguous if the combined extent would be too large.
2323          */
2324         if (*idx > 0) {
2325                 state |= BMAP_LEFT_VALID;
2326                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
2327
2328                 if (isnullstartblock(LEFT.br_startblock))
2329                         state |= BMAP_LEFT_DELAY;
2330         }
2331
2332         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2333             LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2334             LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2335             LEFT.br_state == newext &&
2336             LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2337                 state |= BMAP_LEFT_CONTIG;
2338
2339         /*
2340          * Check and set flags if this segment has a right neighbor.
2341          * Don't set contiguous if the combined extent would be too large.
2342          * Also check for all-three-contiguous being too large.
2343          */
2344         if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
2345                 state |= BMAP_RIGHT_VALID;
2346                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
2347                 if (isnullstartblock(RIGHT.br_startblock))
2348                         state |= BMAP_RIGHT_DELAY;
2349         }
2350
2351         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2352             new_endoff == RIGHT.br_startoff &&
2353             new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2354             newext == RIGHT.br_state &&
2355             new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2356             ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2357                        BMAP_RIGHT_FILLING)) !=
2358                       (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2359                        BMAP_RIGHT_FILLING) ||
2360              LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2361                         <= MAXEXTLEN))
2362                 state |= BMAP_RIGHT_CONTIG;
2363
2364         /*
2365          * Switch out based on the FILLING and CONTIG state bits.
2366          */
2367         switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2368                          BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2369         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2370              BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2371                 /*
2372                  * Setting all of a previous oldext extent to newext.
2373                  * The left and right neighbors are both contiguous with new.
2374                  */
2375                 --*idx;
2376
2377                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2378                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
2379                         LEFT.br_blockcount + PREV.br_blockcount +
2380                         RIGHT.br_blockcount);
2381                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2382
2383                 xfs_iext_remove(ip, *idx + 1, 2, state);
2384                 ip->i_d.di_nextents -= 2;
2385                 if (cur == NULL)
2386                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2387                 else {
2388                         rval = XFS_ILOG_CORE;
2389                         if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
2390                                         RIGHT.br_startblock,
2391                                         RIGHT.br_blockcount, &i)))
2392                                 goto done;
2393                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2394                         if ((error = xfs_btree_delete(cur, &i)))
2395                                 goto done;
2396                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2397                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2398                                 goto done;
2399                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2400                         if ((error = xfs_btree_delete(cur, &i)))
2401                                 goto done;
2402                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2403                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2404                                 goto done;
2405                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2406                         if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2407                                 LEFT.br_startblock,
2408                                 LEFT.br_blockcount + PREV.br_blockcount +
2409                                 RIGHT.br_blockcount, LEFT.br_state)))
2410                                 goto done;
2411                 }
2412                 break;
2413
2414         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2415                 /*
2416                  * Setting all of a previous oldext extent to newext.
2417                  * The left neighbor is contiguous, the right is not.
2418                  */
2419                 --*idx;
2420
2421                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2422                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
2423                         LEFT.br_blockcount + PREV.br_blockcount);
2424                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2425
2426                 xfs_iext_remove(ip, *idx + 1, 1, state);
2427                 ip->i_d.di_nextents--;
2428                 if (cur == NULL)
2429                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2430                 else {
2431                         rval = XFS_ILOG_CORE;
2432                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2433                                         PREV.br_startblock, PREV.br_blockcount,
2434                                         &i)))
2435                                 goto done;
2436                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2437                         if ((error = xfs_btree_delete(cur, &i)))
2438                                 goto done;
2439                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2440                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2441                                 goto done;
2442                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2443                         if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2444                                 LEFT.br_startblock,
2445                                 LEFT.br_blockcount + PREV.br_blockcount,
2446                                 LEFT.br_state)))
2447                                 goto done;
2448                 }
2449                 break;
2450
2451         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2452                 /*
2453                  * Setting all of a previous oldext extent to newext.
2454                  * The right neighbor is contiguous, the left is not.
2455                  */
2456                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2457                 xfs_bmbt_set_blockcount(ep,
2458                         PREV.br_blockcount + RIGHT.br_blockcount);
2459                 xfs_bmbt_set_state(ep, newext);
2460                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2461                 xfs_iext_remove(ip, *idx + 1, 1, state);
2462                 ip->i_d.di_nextents--;
2463                 if (cur == NULL)
2464                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2465                 else {
2466                         rval = XFS_ILOG_CORE;
2467                         if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
2468                                         RIGHT.br_startblock,
2469                                         RIGHT.br_blockcount, &i)))
2470                                 goto done;
2471                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2472                         if ((error = xfs_btree_delete(cur, &i)))
2473                                 goto done;
2474                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2475                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2476                                 goto done;
2477                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2478                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
2479                                 new->br_startblock,
2480                                 new->br_blockcount + RIGHT.br_blockcount,
2481                                 newext)))
2482                                 goto done;
2483                 }
2484                 break;
2485
2486         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2487                 /*
2488                  * Setting all of a previous oldext extent to newext.
2489                  * Neither the left nor right neighbors are contiguous with
2490                  * the new one.
2491                  */
2492                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2493                 xfs_bmbt_set_state(ep, newext);
2494                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2495
2496                 if (cur == NULL)
2497                         rval = XFS_ILOG_DEXT;
2498                 else {
2499                         rval = 0;
2500                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2501                                         new->br_startblock, new->br_blockcount,
2502                                         &i)))
2503                                 goto done;
2504                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2505                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
2506                                 new->br_startblock, new->br_blockcount,
2507                                 newext)))
2508                                 goto done;
2509                 }
2510                 break;
2511
2512         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2513                 /*
2514                  * Setting the first part of a previous oldext extent to newext.
2515                  * The left neighbor is contiguous.
2516                  */
2517                 trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_);
2518                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1),
2519                         LEFT.br_blockcount + new->br_blockcount);
2520                 xfs_bmbt_set_startoff(ep,
2521                         PREV.br_startoff + new->br_blockcount);
2522                 trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_);
2523
2524                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2525                 xfs_bmbt_set_startblock(ep,
2526                         new->br_startblock + new->br_blockcount);
2527                 xfs_bmbt_set_blockcount(ep,
2528                         PREV.br_blockcount - new->br_blockcount);
2529                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2530
2531                 --*idx;
2532
2533                 if (cur == NULL)
2534                         rval = XFS_ILOG_DEXT;
2535                 else {
2536                         rval = 0;
2537                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2538                                         PREV.br_startblock, PREV.br_blockcount,
2539                                         &i)))
2540                                 goto done;
2541                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2542                         if ((error = xfs_bmbt_update(cur,
2543                                 PREV.br_startoff + new->br_blockcount,
2544                                 PREV.br_startblock + new->br_blockcount,
2545                                 PREV.br_blockcount - new->br_blockcount,
2546                                 oldext)))
2547                                 goto done;
2548                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2549                                 goto done;
2550                         error = xfs_bmbt_update(cur, LEFT.br_startoff,
2551                                 LEFT.br_startblock,
2552                                 LEFT.br_blockcount + new->br_blockcount,
2553                                 LEFT.br_state);
2554                         if (error)
2555                                 goto done;
2556                 }
2557                 break;
2558
2559         case BMAP_LEFT_FILLING:
2560                 /*
2561                  * Setting the first part of a previous oldext extent to newext.
2562                  * The left neighbor is not contiguous.
2563                  */
2564                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2565                 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
2566                 xfs_bmbt_set_startoff(ep, new_endoff);
2567                 xfs_bmbt_set_blockcount(ep,
2568                         PREV.br_blockcount - new->br_blockcount);
2569                 xfs_bmbt_set_startblock(ep,
2570                         new->br_startblock + new->br_blockcount);
2571                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2572
2573                 xfs_iext_insert(ip, *idx, 1, new, state);
2574                 ip->i_d.di_nextents++;
2575                 if (cur == NULL)
2576                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2577                 else {
2578                         rval = XFS_ILOG_CORE;
2579                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2580                                         PREV.br_startblock, PREV.br_blockcount,
2581                                         &i)))
2582                                 goto done;
2583                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2584                         if ((error = xfs_bmbt_update(cur,
2585                                 PREV.br_startoff + new->br_blockcount,
2586                                 PREV.br_startblock + new->br_blockcount,
2587                                 PREV.br_blockcount - new->br_blockcount,
2588                                 oldext)))
2589                                 goto done;
2590                         cur->bc_rec.b = *new;
2591                         if ((error = xfs_btree_insert(cur, &i)))
2592                                 goto done;
2593                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2594                 }
2595                 break;
2596
2597         case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2598                 /*
2599                  * Setting the last part of a previous oldext extent to newext.
2600                  * The right neighbor is contiguous with the new allocation.
2601                  */
2602                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2603                 xfs_bmbt_set_blockcount(ep,
2604                         PREV.br_blockcount - new->br_blockcount);
2605                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2606
2607                 ++*idx;
2608
2609                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2610                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
2611                         new->br_startoff, new->br_startblock,
2612                         new->br_blockcount + RIGHT.br_blockcount, newext);
2613                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2614
2615                 if (cur == NULL)
2616                         rval = XFS_ILOG_DEXT;
2617                 else {
2618                         rval = 0;
2619                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2620                                         PREV.br_startblock,
2621                                         PREV.br_blockcount, &i)))
2622                                 goto done;
2623                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2624                         if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2625                                 PREV.br_startblock,
2626                                 PREV.br_blockcount - new->br_blockcount,
2627                                 oldext)))
2628                                 goto done;
2629                         if ((error = xfs_btree_increment(cur, 0, &i)))
2630                                 goto done;
2631                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
2632                                 new->br_startblock,
2633                                 new->br_blockcount + RIGHT.br_blockcount,
2634                                 newext)))
2635                                 goto done;
2636                 }
2637                 break;
2638
2639         case BMAP_RIGHT_FILLING:
2640                 /*
2641                  * Setting the last part of a previous oldext extent to newext.
2642                  * The right neighbor is not contiguous.
2643                  */
2644                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2645                 xfs_bmbt_set_blockcount(ep,
2646                         PREV.br_blockcount - new->br_blockcount);
2647                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2648
2649                 ++*idx;
2650                 xfs_iext_insert(ip, *idx, 1, new, state);
2651
2652                 ip->i_d.di_nextents++;
2653                 if (cur == NULL)
2654                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2655                 else {
2656                         rval = XFS_ILOG_CORE;
2657                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2658                                         PREV.br_startblock, PREV.br_blockcount,
2659                                         &i)))
2660                                 goto done;
2661                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2662                         if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2663                                 PREV.br_startblock,
2664                                 PREV.br_blockcount - new->br_blockcount,
2665                                 oldext)))
2666                                 goto done;
2667                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2668                                         new->br_startblock, new->br_blockcount,
2669                                         &i)))
2670                                 goto done;
2671                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2672                         cur->bc_rec.b.br_state = XFS_EXT_NORM;
2673                         if ((error = xfs_btree_insert(cur, &i)))
2674                                 goto done;
2675                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2676                 }
2677                 break;
2678
2679         case 0:
2680                 /*
2681                  * Setting the middle part of a previous oldext extent to
2682                  * newext.  Contiguity is impossible here.
2683                  * One extent becomes three extents.
2684                  */
2685                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2686                 xfs_bmbt_set_blockcount(ep,
2687                         new->br_startoff - PREV.br_startoff);
2688                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2689
2690                 r[0] = *new;
2691                 r[1].br_startoff = new_endoff;
2692                 r[1].br_blockcount =
2693                         PREV.br_startoff + PREV.br_blockcount - new_endoff;
2694                 r[1].br_startblock = new->br_startblock + new->br_blockcount;
2695                 r[1].br_state = oldext;
2696
2697                 ++*idx;
2698                 xfs_iext_insert(ip, *idx, 2, &r[0], state);
2699
2700                 ip->i_d.di_nextents += 2;
2701                 if (cur == NULL)
2702                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2703                 else {
2704                         rval = XFS_ILOG_CORE;
2705                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2706                                         PREV.br_startblock, PREV.br_blockcount,
2707                                         &i)))
2708                                 goto done;
2709                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2710                         /* new right extent - oldext */
2711                         if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
2712                                 r[1].br_startblock, r[1].br_blockcount,
2713                                 r[1].br_state)))
2714                                 goto done;
2715                         /* new left extent - oldext */
2716                         cur->bc_rec.b = PREV;
2717                         cur->bc_rec.b.br_blockcount =
2718                                 new->br_startoff - PREV.br_startoff;
2719                         if ((error = xfs_btree_insert(cur, &i)))
2720                                 goto done;
2721                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2722                         /*
2723                          * Reset the cursor to the position of the new extent
2724                          * we are about to insert as we can't trust it after
2725                          * the previous insert.
2726                          */
2727                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2728                                         new->br_startblock, new->br_blockcount,
2729                                         &i)))
2730                                 goto done;
2731                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2732                         /* new middle extent - newext */
2733                         cur->bc_rec.b.br_state = new->br_state;
2734                         if ((error = xfs_btree_insert(cur, &i)))
2735                                 goto done;
2736                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2737                 }
2738                 break;
2739
2740         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2741         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2742         case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2743         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2744         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2745         case BMAP_LEFT_CONTIG:
2746         case BMAP_RIGHT_CONTIG:
2747                 /*
2748                  * These cases are all impossible.
2749                  */
2750                 ASSERT(0);
2751         }
2752
2753         /* convert to a btree if necessary */
2754         if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
2755                 int     tmp_logflags;   /* partial log flag return val */
2756
2757                 ASSERT(cur == NULL);
2758                 error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur,
2759                                 0, &tmp_logflags, XFS_DATA_FORK);
2760                 *logflagsp |= tmp_logflags;
2761                 if (error)
2762                         goto done;
2763         }
2764
2765         /* clear out the allocated field, done with it now in any case. */
2766         if (cur) {
2767                 cur->bc_private.b.allocated = 0;
2768                 *curp = cur;
2769         }
2770
2771         xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
2772 done:
2773         *logflagsp |= rval;
2774         return error;
2775 #undef  LEFT
2776 #undef  RIGHT
2777 #undef  PREV
2778 }
2779
2780 /*
2781  * Convert a hole to a delayed allocation.
2782  */
2783 STATIC void
2784 xfs_bmap_add_extent_hole_delay(
2785         xfs_inode_t             *ip,    /* incore inode pointer */
2786         xfs_extnum_t            *idx,   /* extent number to update/insert */
2787         xfs_bmbt_irec_t         *new)   /* new data to add to file extents */
2788 {
2789         xfs_ifork_t             *ifp;   /* inode fork pointer */
2790         xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2791         xfs_filblks_t           newlen=0;       /* new indirect size */
2792         xfs_filblks_t           oldlen=0;       /* old indirect size */
2793         xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2794         int                     state;  /* state bits, accessed thru macros */
2795         xfs_filblks_t           temp=0; /* temp for indirect calculations */
2796
2797         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
2798         state = 0;
2799         ASSERT(isnullstartblock(new->br_startblock));
2800
2801         /*
2802          * Check and set flags if this segment has a left neighbor
2803          */
2804         if (*idx > 0) {
2805                 state |= BMAP_LEFT_VALID;
2806                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
2807
2808                 if (isnullstartblock(left.br_startblock))
2809                         state |= BMAP_LEFT_DELAY;
2810         }
2811
2812         /*
2813          * Check and set flags if the current (right) segment exists.
2814          * If it doesn't exist, we're converting the hole at end-of-file.
2815          */
2816         if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
2817                 state |= BMAP_RIGHT_VALID;
2818                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
2819
2820                 if (isnullstartblock(right.br_startblock))
2821                         state |= BMAP_RIGHT_DELAY;
2822         }
2823
2824         /*
2825          * Set contiguity flags on the left and right neighbors.
2826          * Don't let extents get too large, even if the pieces are contiguous.
2827          */
2828         if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2829             left.br_startoff + left.br_blockcount == new->br_startoff &&
2830             left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2831                 state |= BMAP_LEFT_CONTIG;
2832
2833         if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2834             new->br_startoff + new->br_blockcount == right.br_startoff &&
2835             new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2836             (!(state & BMAP_LEFT_CONTIG) ||
2837              (left.br_blockcount + new->br_blockcount +
2838               right.br_blockcount <= MAXEXTLEN)))
2839                 state |= BMAP_RIGHT_CONTIG;
2840
2841         /*
2842          * Switch out based on the contiguity flags.
2843          */
2844         switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2845         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2846                 /*
2847                  * New allocation is contiguous with delayed allocations
2848                  * on the left and on the right.
2849                  * Merge all three into a single extent record.
2850                  */
2851                 --*idx;
2852                 temp = left.br_blockcount + new->br_blockcount +
2853                         right.br_blockcount;
2854
2855                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2856                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
2857                 oldlen = startblockval(left.br_startblock) +
2858                         startblockval(new->br_startblock) +
2859                         startblockval(right.br_startblock);
2860                 newlen = xfs_bmap_worst_indlen(ip, temp);
2861                 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
2862                         nullstartblock((int)newlen));
2863                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2864
2865                 xfs_iext_remove(ip, *idx + 1, 1, state);
2866                 break;
2867
2868         case BMAP_LEFT_CONTIG:
2869                 /*
2870                  * New allocation is contiguous with a delayed allocation
2871                  * on the left.
2872                  * Merge the new allocation with the left neighbor.
2873                  */
2874                 --*idx;
2875                 temp = left.br_blockcount + new->br_blockcount;
2876
2877                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2878                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
2879                 oldlen = startblockval(left.br_startblock) +
2880                         startblockval(new->br_startblock);
2881                 newlen = xfs_bmap_worst_indlen(ip, temp);
2882                 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
2883                         nullstartblock((int)newlen));
2884                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2885                 break;
2886
2887         case BMAP_RIGHT_CONTIG:
2888                 /*
2889                  * New allocation is contiguous with a delayed allocation
2890                  * on the right.
2891                  * Merge the new allocation with the right neighbor.
2892                  */
2893                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2894                 temp = new->br_blockcount + right.br_blockcount;
2895                 oldlen = startblockval(new->br_startblock) +
2896                         startblockval(right.br_startblock);
2897                 newlen = xfs_bmap_worst_indlen(ip, temp);
2898                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
2899                         new->br_startoff,
2900                         nullstartblock((int)newlen), temp, right.br_state);
2901                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2902                 break;
2903
2904         case 0:
2905                 /*
2906                  * New allocation is not contiguous with another
2907                  * delayed allocation.
2908                  * Insert a new entry.
2909                  */
2910                 oldlen = newlen = 0;
2911                 xfs_iext_insert(ip, *idx, 1, new, state);
2912                 break;
2913         }
2914         if (oldlen != newlen) {
2915                 ASSERT(oldlen > newlen);
2916                 xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2917                                  false);
2918                 /*
2919                  * Nothing to do for disk quota accounting here.
2920                  */
2921         }
2922 }
2923
2924 /*
2925  * Convert a hole to a real allocation.
2926  */
2927 STATIC int                              /* error */
2928 xfs_bmap_add_extent_hole_real(
2929         struct xfs_bmalloca     *bma,
2930         int                     whichfork)
2931 {
2932         struct xfs_bmbt_irec    *new = &bma->got;
2933         int                     error;  /* error return value */
2934         int                     i;      /* temp state */
2935         xfs_ifork_t             *ifp;   /* inode fork pointer */
2936         xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2937         xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2938         int                     rval=0; /* return value (logging flags) */
2939         int                     state;  /* state bits, accessed thru macros */
2940         struct xfs_mount        *mp;
2941
2942         mp = bma->tp ? bma->tp->t_mountp : NULL;
2943         ifp = XFS_IFORK_PTR(bma->ip, whichfork);
2944
2945         ASSERT(bma->idx >= 0);
2946         ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
2947         ASSERT(!isnullstartblock(new->br_startblock));
2948         ASSERT(!bma->cur ||
2949                !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
2950
2951         XFS_STATS_INC(mp, xs_add_exlist);
2952
2953         state = 0;
2954         if (whichfork == XFS_ATTR_FORK)
2955                 state |= BMAP_ATTRFORK;
2956
2957         /*
2958          * Check and set flags if this segment has a left neighbor.
2959          */
2960         if (bma->idx > 0) {
2961                 state |= BMAP_LEFT_VALID;
2962                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left);
2963                 if (isnullstartblock(left.br_startblock))
2964                         state |= BMAP_LEFT_DELAY;
2965         }
2966
2967         /*
2968          * Check and set flags if this segment has a current value.
2969          * Not true if we're inserting into the "hole" at eof.
2970          */
2971         if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
2972                 state |= BMAP_RIGHT_VALID;
2973                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
2974                 if (isnullstartblock(right.br_startblock))
2975                         state |= BMAP_RIGHT_DELAY;
2976         }
2977
2978         /*
2979          * We're inserting a real allocation between "left" and "right".
2980          * Set the contiguity flags.  Don't let extents get too large.
2981          */
2982         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2983             left.br_startoff + left.br_blockcount == new->br_startoff &&
2984             left.br_startblock + left.br_blockcount == new->br_startblock &&
2985             left.br_state == new->br_state &&
2986             left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2987                 state |= BMAP_LEFT_CONTIG;
2988
2989         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2990             new->br_startoff + new->br_blockcount == right.br_startoff &&
2991             new->br_startblock + new->br_blockcount == right.br_startblock &&
2992             new->br_state == right.br_state &&
2993             new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2994             (!(state & BMAP_LEFT_CONTIG) ||
2995              left.br_blockcount + new->br_blockcount +
2996              right.br_blockcount <= MAXEXTLEN))
2997                 state |= BMAP_RIGHT_CONTIG;
2998
2999         error = 0;
3000         /*
3001          * Select which case we're in here, and implement it.
3002          */
3003         switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
3004         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
3005                 /*
3006                  * New allocation is contiguous with real allocations on the
3007                  * left and on the right.
3008                  * Merge all three into a single extent record.
3009                  */
3010                 --bma->idx;
3011                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3012                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
3013                         left.br_blockcount + new->br_blockcount +
3014                         right.br_blockcount);
3015                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3016
3017                 xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
3018
3019                 XFS_IFORK_NEXT_SET(bma->ip, whichfork,
3020                         XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1);
3021                 if (bma->cur == NULL) {
3022                         rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
3023                 } else {
3024                         rval = XFS_ILOG_CORE;
3025                         error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff,
3026                                         right.br_startblock, right.br_blockcount,
3027                                         &i);
3028                         if (error)
3029                                 goto done;
3030                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3031                         error = xfs_btree_delete(bma->cur, &i);
3032                         if (error)
3033                                 goto done;
3034                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3035                         error = xfs_btree_decrement(bma->cur, 0, &i);
3036                         if (error)
3037                                 goto done;
3038                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3039                         error = xfs_bmbt_update(bma->cur, left.br_startoff,
3040                                         left.br_startblock,
3041                                         left.br_blockcount +
3042                                                 new->br_blockcount +
3043                                                 right.br_blockcount,
3044                                         left.br_state);
3045                         if (error)
3046                                 goto done;
3047                 }
3048                 break;
3049
3050         case BMAP_LEFT_CONTIG:
3051                 /*
3052                  * New allocation is contiguous with a real allocation
3053                  * on the left.
3054                  * Merge the new allocation with the left neighbor.
3055                  */
3056                 --bma->idx;
3057                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3058                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
3059                         left.br_blockcount + new->br_blockcount);
3060                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3061
3062                 if (bma->cur == NULL) {
3063                         rval = xfs_ilog_fext(whichfork);
3064                 } else {
3065                         rval = 0;
3066                         error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff,
3067                                         left.br_startblock, left.br_blockcount,
3068                                         &i);
3069                         if (error)
3070                                 goto done;
3071                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3072                         error = xfs_bmbt_update(bma->cur, left.br_startoff,
3073                                         left.br_startblock,
3074                                         left.br_blockcount +
3075                                                 new->br_blockcount,
3076                                         left.br_state);
3077                         if (error)
3078                                 goto done;
3079                 }
3080                 break;
3081
3082         case BMAP_RIGHT_CONTIG:
3083                 /*
3084                  * New allocation is contiguous with a real allocation
3085                  * on the right.
3086                  * Merge the new allocation with the right neighbor.
3087                  */
3088                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3089                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx),
3090                         new->br_startoff, new->br_startblock,
3091                         new->br_blockcount + right.br_blockcount,
3092                         right.br_state);
3093                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3094
3095                 if (bma->cur == NULL) {
3096                         rval = xfs_ilog_fext(whichfork);
3097                 } else {
3098                         rval = 0;
3099                         error = xfs_bmbt_lookup_eq(bma->cur,
3100                                         right.br_startoff,
3101                                         right.br_startblock,
3102                                         right.br_blockcount, &i);
3103                         if (error)
3104                                 goto done;
3105                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3106                         error = xfs_bmbt_update(bma->cur, new->br_startoff,
3107                                         new->br_startblock,
3108                                         new->br_blockcount +
3109                                                 right.br_blockcount,
3110                                         right.br_state);
3111                         if (error)
3112                                 goto done;
3113                 }
3114                 break;
3115
3116         case 0:
3117                 /*
3118                  * New allocation is not contiguous with another
3119                  * real allocation.
3120                  * Insert a new entry.
3121                  */
3122                 xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
3123                 XFS_IFORK_NEXT_SET(bma->ip, whichfork,
3124                         XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1);
3125                 if (bma->cur == NULL) {
3126                         rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
3127                 } else {
3128                         rval = XFS_ILOG_CORE;
3129                         error = xfs_bmbt_lookup_eq(bma->cur,
3130                                         new->br_startoff,
3131                                         new->br_startblock,
3132                                         new->br_blockcount, &i);
3133                         if (error)
3134                                 goto done;
3135                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
3136                         bma->cur->bc_rec.b.br_state = new->br_state;
3137                         error = xfs_btree_insert(bma->cur, &i);
3138                         if (error)
3139                                 goto done;
3140                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3141                 }
3142                 break;
3143         }
3144
3145         /* convert to a btree if necessary */
3146         if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
3147                 int     tmp_logflags;   /* partial log flag return val */
3148
3149                 ASSERT(bma->cur == NULL);
3150                 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
3151                                 bma->firstblock, bma->flist, &bma->cur,
3152                                 0, &tmp_logflags, whichfork);
3153                 bma->logflags |= tmp_logflags;
3154                 if (error)
3155                         goto done;
3156         }
3157
3158         /* clear out the allocated field, done with it now in any case. */
3159         if (bma->cur)
3160                 bma->cur->bc_private.b.allocated = 0;
3161
3162         xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
3163 done:
3164         bma->logflags |= rval;
3165         return error;
3166 }
3167
3168 /*
3169  * Functions used in the extent read, allocate and remove paths
3170  */
3171
3172 /*
3173  * Adjust the size of the new extent based on di_extsize and rt extsize.
3174  */
3175 int
3176 xfs_bmap_extsize_align(
3177         xfs_mount_t     *mp,
3178         xfs_bmbt_irec_t *gotp,          /* next extent pointer */
3179         xfs_bmbt_irec_t *prevp,         /* previous extent pointer */
3180         xfs_extlen_t    extsz,          /* align to this extent size */
3181         int             rt,             /* is this a realtime inode? */
3182         int             eof,            /* is extent at end-of-file? */
3183         int             delay,          /* creating delalloc extent? */
3184         int             convert,        /* overwriting unwritten extent? */
3185         xfs_fileoff_t   *offp,          /* in/out: aligned offset */
3186         xfs_extlen_t    *lenp)          /* in/out: aligned length */
3187 {
3188         xfs_fileoff_t   orig_off;       /* original offset */
3189         xfs_extlen_t    orig_alen;      /* original length */
3190         xfs_fileoff_t   orig_end;       /* original off+len */
3191         xfs_fileoff_t   nexto;          /* next file offset */
3192         xfs_fileoff_t   prevo;          /* previous file offset */
3193         xfs_fileoff_t   align_off;      /* temp for offset */
3194         xfs_extlen_t    align_alen;     /* temp for length */
3195         xfs_extlen_t    temp;           /* temp for calculations */
3196
3197         if (convert)
3198                 return 0;
3199
3200         orig_off = align_off = *offp;
3201         orig_alen = align_alen = *lenp;
3202         orig_end = orig_off + orig_alen;
3203
3204         /*
3205          * If this request overlaps an existing extent, then don't
3206          * attempt to perform any additional alignment.
3207          */
3208         if (!delay && !eof &&
3209             (orig_off >= gotp->br_startoff) &&
3210             (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
3211                 return 0;
3212         }
3213
3214         /*
3215          * If the file offset is unaligned vs. the extent size
3216          * we need to align it.  This will be possible unless
3217          * the file was previously written with a kernel that didn't
3218          * perform this alignment, or if a truncate shot us in the
3219          * foot.
3220          */
3221         temp = do_mod(orig_off, extsz);
3222         if (temp) {
3223                 align_alen += temp;
3224                 align_off -= temp;
3225         }
3226
3227         /* Same adjustment for the end of the requested area. */
3228         temp = (align_alen % extsz);
3229         if (temp)
3230                 align_alen += extsz - temp;
3231
3232         /*
3233          * For large extent hint sizes, the aligned extent might be larger than
3234          * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
3235          * the length back under MAXEXTLEN. The outer allocation loops handle
3236          * short allocation just fine, so it is safe to do this. We only want to
3237          * do it when we are forced to, though, because it means more allocation
3238          * operations are required.
3239          */
3240         while (align_alen > MAXEXTLEN)
3241                 align_alen -= extsz;
3242         ASSERT(align_alen <= MAXEXTLEN);
3243
3244         /*
3245          * If the previous block overlaps with this proposed allocation
3246          * then move the start forward without adjusting the length.
3247          */
3248         if (prevp->br_startoff != NULLFILEOFF) {
3249                 if (prevp->br_startblock == HOLESTARTBLOCK)
3250                         prevo = prevp->br_startoff;
3251                 else
3252                         prevo = prevp->br_startoff + prevp->br_blockcount;
3253         } else
3254                 prevo = 0;
3255         if (align_off != orig_off && align_off < prevo)
3256                 align_off = prevo;
3257         /*
3258          * If the next block overlaps with this proposed allocation
3259          * then move the start back without adjusting the length,
3260          * but not before offset 0.
3261          * This may of course make the start overlap previous block,
3262          * and if we hit the offset 0 limit then the next block
3263          * can still overlap too.
3264          */
3265         if (!eof && gotp->br_startoff != NULLFILEOFF) {
3266                 if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
3267                     (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
3268                         nexto = gotp->br_startoff + gotp->br_blockcount;
3269                 else
3270                         nexto = gotp->br_startoff;
3271         } else
3272                 nexto = NULLFILEOFF;
3273         if (!eof &&
3274             align_off + align_alen != orig_end &&
3275             align_off + align_alen > nexto)
3276                 align_off = nexto > align_alen ? nexto - align_alen : 0;
3277         /*
3278          * If we're now overlapping the next or previous extent that
3279          * means we can't fit an extsz piece in this hole.  Just move
3280          * the start forward to the first valid spot and set
3281          * the length so we hit the end.
3282          */
3283         if (align_off != orig_off && align_off < prevo)
3284                 align_off = prevo;
3285         if (align_off + align_alen != orig_end &&
3286             align_off + align_alen > nexto &&
3287             nexto != NULLFILEOFF) {
3288                 ASSERT(nexto > prevo);
3289                 align_alen = nexto - align_off;
3290         }
3291
3292         /*
3293          * If realtime, and the result isn't a multiple of the realtime
3294          * extent size we need to remove blocks until it is.
3295          */
3296         if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
3297                 /*
3298                  * We're not covering the original request, or
3299                  * we won't be able to once we fix the length.
3300                  */
3301                 if (orig_off < align_off ||
3302                     orig_end > align_off + align_alen ||
3303                     align_alen - temp < orig_alen)
3304                         return -EINVAL;
3305                 /*
3306                  * Try to fix it by moving the start up.
3307                  */
3308                 if (align_off + temp <= orig_off) {
3309                         align_alen -= temp;
3310                         align_off += temp;
3311                 }
3312                 /*
3313                  * Try to fix it by moving the end in.
3314                  */
3315                 else if (align_off + align_alen - temp >= orig_end)
3316                         align_alen -= temp;
3317                 /*
3318                  * Set the start to the minimum then trim the length.
3319                  */
3320                 else {
3321                         align_alen -= orig_off - align_off;
3322                         align_off = orig_off;
3323                         align_alen -= align_alen % mp->m_sb.sb_rextsize;
3324                 }
3325                 /*
3326                  * Result doesn't cover the request, fail it.
3327                  */
3328                 if (orig_off < align_off || orig_end > align_off + align_alen)
3329                         return -EINVAL;
3330         } else {
3331                 ASSERT(orig_off >= align_off);
3332                 /* see MAXEXTLEN handling above */
3333                 ASSERT(orig_end <= align_off + align_alen ||
3334                        align_alen + extsz > MAXEXTLEN);
3335         }
3336
3337 #ifdef DEBUG
3338         if (!eof && gotp->br_startoff != NULLFILEOFF)
3339                 ASSERT(align_off + align_alen <= gotp->br_startoff);
3340         if (prevp->br_startoff != NULLFILEOFF)
3341                 ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3342 #endif
3343
3344         *lenp = align_alen;
3345         *offp = align_off;
3346         return 0;
3347 }
3348
3349 #define XFS_ALLOC_GAP_UNITS     4
3350
3351 void
3352 xfs_bmap_adjacent(
3353         struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3354 {
3355         xfs_fsblock_t   adjust;         /* adjustment to block numbers */
3356         xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
3357         xfs_mount_t     *mp;            /* mount point structure */
3358         int             nullfb;         /* true if ap->firstblock isn't set */
3359         int             rt;             /* true if inode is realtime */
3360
3361 #define ISVALID(x,y)    \
3362         (rt ? \
3363                 (x) < mp->m_sb.sb_rblocks : \
3364                 XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3365                 XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3366                 XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3367
3368         mp = ap->ip->i_mount;
3369         nullfb = *ap->firstblock == NULLFSBLOCK;
3370         rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
3371         fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3372         /*
3373          * If allocating at eof, and there's a previous real block,
3374          * try to use its last block as our starting point.
3375          */
3376         if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3377             !isnullstartblock(ap->prev.br_startblock) &&
3378             ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3379                     ap->prev.br_startblock)) {
3380                 ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3381                 /*
3382                  * Adjust for the gap between prevp and us.
3383                  */
3384                 adjust = ap->offset -
3385                         (ap->prev.br_startoff + ap->prev.br_blockcount);
3386                 if (adjust &&
3387                     ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3388                         ap->blkno += adjust;
3389         }
3390         /*
3391          * If not at eof, then compare the two neighbor blocks.
3392          * Figure out whether either one gives us a good starting point,
3393          * and pick the better one.
3394          */
3395         else if (!ap->eof) {
3396                 xfs_fsblock_t   gotbno;         /* right side block number */
3397                 xfs_fsblock_t   gotdiff=0;      /* right side difference */
3398                 xfs_fsblock_t   prevbno;        /* left side block number */
3399                 xfs_fsblock_t   prevdiff=0;     /* left side difference */
3400
3401                 /*
3402                  * If there's a previous (left) block, select a requested
3403                  * start block based on it.
3404                  */
3405                 if (ap->prev.br_startoff != NULLFILEOFF &&
3406                     !isnullstartblock(ap->prev.br_startblock) &&
3407                     (prevbno = ap->prev.br_startblock +
3408                                ap->prev.br_blockcount) &&
3409                     ISVALID(prevbno, ap->prev.br_startblock)) {
3410                         /*
3411                          * Calculate gap to end of previous block.
3412                          */
3413                         adjust = prevdiff = ap->offset -
3414                                 (ap->prev.br_startoff +
3415                                  ap->prev.br_blockcount);
3416                         /*
3417                          * Figure the startblock based on the previous block's
3418                          * end and the gap size.
3419                          * Heuristic!
3420                          * If the gap is large relative to the piece we're
3421                          * allocating, or using it gives us an invalid block
3422                          * number, then just use the end of the previous block.
3423                          */
3424                         if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3425                             ISVALID(prevbno + prevdiff,
3426                                     ap->prev.br_startblock))
3427                                 prevbno += adjust;
3428                         else
3429                                 prevdiff += adjust;
3430                         /*
3431                          * If the firstblock forbids it, can't use it,
3432                          * must use default.
3433                          */
3434                         if (!rt && !nullfb &&
3435                             XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3436                                 prevbno = NULLFSBLOCK;
3437                 }
3438                 /*
3439                  * No previous block or can't follow it, just default.
3440                  */
3441                 else
3442                         prevbno = NULLFSBLOCK;
3443                 /*
3444                  * If there's a following (right) block, select a requested
3445                  * start block based on it.
3446                  */
3447                 if (!isnullstartblock(ap->got.br_startblock)) {
3448                         /*
3449                          * Calculate gap to start of next block.
3450                          */
3451                         adjust = gotdiff = ap->got.br_startoff - ap->offset;
3452                         /*
3453                          * Figure the startblock based on the next block's
3454                          * start and the gap size.
3455                          */
3456                         gotbno = ap->got.br_startblock;
3457                         /*
3458                          * Heuristic!
3459                          * If the gap is large relative to the piece we're
3460                          * allocating, or using it gives us an invalid block
3461                          * number, then just use the start of the next block
3462                          * offset by our length.
3463                          */
3464                         if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3465                             ISVALID(gotbno - gotdiff, gotbno))
3466                                 gotbno -= adjust;
3467                         else if (ISVALID(gotbno - ap->length, gotbno)) {
3468                                 gotbno -= ap->length;
3469                                 gotdiff += adjust - ap->length;
3470                         } else
3471                                 gotdiff += adjust;
3472                         /*
3473                          * If the firstblock forbids it, can't use it,
3474                          * must use default.
3475                          */
3476                         if (!rt && !nullfb &&
3477                             XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3478                                 gotbno = NULLFSBLOCK;
3479                 }
3480                 /*
3481                  * No next block, just default.
3482                  */
3483                 else
3484                         gotbno = NULLFSBLOCK;
3485                 /*
3486                  * If both valid, pick the better one, else the only good
3487                  * one, else ap->blkno is already set (to 0 or the inode block).
3488                  */
3489                 if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3490                         ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3491                 else if (prevbno != NULLFSBLOCK)
3492                         ap->blkno = prevbno;
3493                 else if (gotbno != NULLFSBLOCK)
3494                         ap->blkno = gotbno;
3495         }
3496 #undef ISVALID
3497 }
3498
3499 static int
3500 xfs_bmap_longest_free_extent(
3501         struct xfs_trans        *tp,
3502         xfs_agnumber_t          ag,
3503         xfs_extlen_t            *blen,
3504         int                     *notinit)
3505 {
3506         struct xfs_mount        *mp = tp->t_mountp;
3507         struct xfs_perag        *pag;
3508         xfs_extlen_t            longest;
3509         int                     error = 0;
3510
3511         pag = xfs_perag_get(mp, ag);
3512         if (!pag->pagf_init) {
3513                 error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3514                 if (error)
3515                         goto out;
3516
3517                 if (!pag->pagf_init) {
3518                         *notinit = 1;
3519                         goto out;
3520                 }
3521         }
3522
3523         longest = xfs_alloc_longest_free_extent(mp, pag,
3524                                         xfs_alloc_min_freelist(mp, pag));
3525         if (*blen < longest)
3526                 *blen = longest;
3527
3528 out:
3529         xfs_perag_put(pag);
3530         return error;
3531 }
3532
3533 static void
3534 xfs_bmap_select_minlen(
3535         struct xfs_bmalloca     *ap,
3536         struct xfs_alloc_arg    *args,
3537         xfs_extlen_t            *blen,
3538         int                     notinit)
3539 {
3540         if (notinit || *blen < ap->minlen) {
3541                 /*
3542                  * Since we did a BUF_TRYLOCK above, it is possible that
3543                  * there is space for this request.
3544                  */
3545                 args->minlen = ap->minlen;
3546         } else if (*blen < args->maxlen) {
3547                 /*
3548                  * If the best seen length is less than the request length,
3549                  * use the best as the minimum.
3550                  */
3551                 args->minlen = *blen;
3552         } else {
3553                 /*
3554                  * Otherwise we've seen an extent as big as maxlen, use that
3555                  * as the minimum.
3556                  */
3557                 args->minlen = args->maxlen;
3558         }
3559 }
3560
3561 STATIC int
3562 xfs_bmap_btalloc_nullfb(
3563         struct xfs_bmalloca     *ap,
3564         struct xfs_alloc_arg    *args,
3565         xfs_extlen_t            *blen)
3566 {
3567         struct xfs_mount        *mp = ap->ip->i_mount;
3568         xfs_agnumber_t          ag, startag;
3569         int                     notinit = 0;
3570         int                     error;
3571
3572         args->type = XFS_ALLOCTYPE_START_BNO;
3573         args->total = ap->total;
3574
3575         startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3576         if (startag == NULLAGNUMBER)
3577                 startag = ag = 0;
3578
3579         while (*blen < args->maxlen) {
3580                 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3581                                                      &notinit);
3582                 if (error)
3583                         return error;
3584
3585                 if (++ag == mp->m_sb.sb_agcount)
3586                         ag = 0;
3587                 if (ag == startag)
3588                         break;
3589         }
3590
3591         xfs_bmap_select_minlen(ap, args, blen, notinit);
3592         return 0;
3593 }
3594
3595 STATIC int
3596 xfs_bmap_btalloc_filestreams(
3597         struct xfs_bmalloca     *ap,
3598         struct xfs_alloc_arg    *args,
3599         xfs_extlen_t            *blen)
3600 {
3601         struct xfs_mount        *mp = ap->ip->i_mount;
3602         xfs_agnumber_t          ag;
3603         int                     notinit = 0;
3604         int                     error;
3605
3606         args->type = XFS_ALLOCTYPE_NEAR_BNO;
3607         args->total = ap->total;
3608
3609         ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3610         if (ag == NULLAGNUMBER)
3611                 ag = 0;
3612
3613         error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3614         if (error)
3615                 return error;
3616
3617         if (*blen < args->maxlen) {
3618                 error = xfs_filestream_new_ag(ap, &ag);
3619                 if (error)
3620                         return error;
3621
3622                 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3623                                                      &notinit);
3624                 if (error)
3625                         return error;
3626
3627         }
3628
3629         xfs_bmap_select_minlen(ap, args, blen, notinit);
3630
3631         /*
3632          * Set the failure fallback case to look in the selected AG as stream
3633          * may have moved.
3634          */
3635         ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3636         return 0;
3637 }
3638
3639 STATIC int
3640 xfs_bmap_btalloc(
3641         struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3642 {
3643         xfs_mount_t     *mp;            /* mount point structure */
3644         xfs_alloctype_t atype = 0;      /* type for allocation routines */
3645         xfs_extlen_t    align;          /* minimum allocation alignment */
3646         xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
3647         xfs_agnumber_t  ag;
3648         xfs_alloc_arg_t args;
3649         xfs_extlen_t    blen;
3650         xfs_extlen_t    nextminlen = 0;
3651         int             nullfb;         /* true if ap->firstblock isn't set */
3652         int             isaligned;
3653         int             tryagain;
3654         int             error;
3655         int             stripe_align;
3656
3657         ASSERT(ap->length);
3658
3659         mp = ap->ip->i_mount;
3660
3661         /* stripe alignment for allocation is determined by mount parameters */
3662         stripe_align = 0;
3663         if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3664                 stripe_align = mp->m_swidth;
3665         else if (mp->m_dalign)
3666                 stripe_align = mp->m_dalign;
3667
3668         align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
3669         if (unlikely(align)) {
3670                 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
3671                                                 align, 0, ap->eof, 0, ap->conv,
3672                                                 &ap->offset, &ap->length);
3673                 ASSERT(!error);
3674                 ASSERT(ap->length);
3675         }
3676
3677
3678         nullfb = *ap->firstblock == NULLFSBLOCK;
3679         fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3680         if (nullfb) {
3681                 if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
3682                         ag = xfs_filestream_lookup_ag(ap->ip);
3683                         ag = (ag != NULLAGNUMBER) ? ag : 0;
3684                         ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3685                 } else {
3686                         ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3687                 }
3688         } else
3689                 ap->blkno = *ap->firstblock;
3690
3691         xfs_bmap_adjacent(ap);
3692
3693         /*
3694          * If allowed, use ap->blkno; otherwise must use firstblock since
3695          * it's in the right allocation group.
3696          */
3697         if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3698                 ;
3699         else
3700                 ap->blkno = *ap->firstblock;
3701         /*
3702          * Normal allocation, done through xfs_alloc_vextent.
3703          */
3704         tryagain = isaligned = 0;
3705         memset(&args, 0, sizeof(args));
3706         args.tp = ap->tp;
3707         args.mp = mp;
3708         args.fsbno = ap->blkno;
3709
3710         /* Trim the allocation back to the maximum an AG can fit. */
3711         args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp));
3712         args.firstblock = *ap->firstblock;
3713         blen = 0;
3714         if (nullfb) {
3715                 /*
3716                  * Search for an allocation group with a single extent large
3717                  * enough for the request.  If one isn't found, then adjust
3718                  * the minimum allocation size to the largest space found.
3719                  */
3720                 if (ap->userdata && xfs_inode_is_filestream(ap->ip))
3721                         error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3722                 else
3723                         error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3724                 if (error)
3725                         return error;
3726         } else if (ap->flist->xbf_low) {
3727                 if (xfs_inode_is_filestream(ap->ip))
3728                         args.type = XFS_ALLOCTYPE_FIRST_AG;
3729                 else
3730                         args.type = XFS_ALLOCTYPE_START_BNO;
3731                 args.total = args.minlen = ap->minlen;
3732         } else {
3733                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3734                 args.total = ap->total;
3735                 args.minlen = ap->minlen;
3736         }
3737         /* apply extent size hints if obtained earlier */
3738         if (unlikely(align)) {
3739                 args.prod = align;
3740                 if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
3741                         args.mod = (xfs_extlen_t)(args.prod - args.mod);
3742         } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) {
3743                 args.prod = 1;
3744                 args.mod = 0;
3745         } else {
3746                 args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog;
3747                 if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
3748                         args.mod = (xfs_extlen_t)(args.prod - args.mod);
3749         }
3750         /*
3751          * If we are not low on available data blocks, and the
3752          * underlying logical volume manager is a stripe, and
3753          * the file offset is zero then try to allocate data
3754          * blocks on stripe unit boundary.
3755          * NOTE: ap->aeof is only set if the allocation length
3756          * is >= the stripe unit and the allocation offset is
3757          * at the end of file.
3758          */
3759         if (!ap->flist->xbf_low && ap->aeof) {
3760                 if (!ap->offset) {
3761                         args.alignment = stripe_align;
3762                         atype = args.type;
3763                         isaligned = 1;
3764                         /*
3765                          * Adjust for alignment
3766                          */
3767                         if (blen > args.alignment && blen <= args.maxlen)
3768                                 args.minlen = blen - args.alignment;
3769                         args.minalignslop = 0;
3770                 } else {
3771                         /*
3772                          * First try an exact bno allocation.
3773                          * If it fails then do a near or start bno
3774                          * allocation with alignment turned on.
3775                          */
3776                         atype = args.type;
3777                         tryagain = 1;
3778                         args.type = XFS_ALLOCTYPE_THIS_BNO;
3779                         args.alignment = 1;
3780                         /*
3781                          * Compute the minlen+alignment for the
3782                          * next case.  Set slop so that the value
3783                          * of minlen+alignment+slop doesn't go up
3784                          * between the calls.
3785                          */
3786                         if (blen > stripe_align && blen <= args.maxlen)
3787                                 nextminlen = blen - stripe_align;
3788                         else
3789                                 nextminlen = args.minlen;
3790                         if (nextminlen + stripe_align > args.minlen + 1)
3791                                 args.minalignslop =
3792                                         nextminlen + stripe_align -
3793                                         args.minlen - 1;
3794                         else
3795                                 args.minalignslop = 0;
3796                 }
3797         } else {
3798                 args.alignment = 1;
3799                 args.minalignslop = 0;
3800         }
3801         args.minleft = ap->minleft;
3802         args.wasdel = ap->wasdel;
3803         args.isfl = 0;
3804         args.userdata = ap->userdata;
3805         if (ap->userdata & XFS_ALLOC_USERDATA_ZERO)
3806                 args.ip = ap->ip;
3807
3808         error = xfs_alloc_vextent(&args);
3809         if (error)
3810                 return error;
3811
3812         if (tryagain && args.fsbno == NULLFSBLOCK) {
3813                 /*
3814                  * Exact allocation failed. Now try with alignment
3815                  * turned on.
3816                  */
3817                 args.type = atype;
3818                 args.fsbno = ap->blkno;
3819                 args.alignment = stripe_align;
3820                 args.minlen = nextminlen;
3821                 args.minalignslop = 0;
3822                 isaligned = 1;
3823                 if ((error = xfs_alloc_vextent(&args)))
3824                         return error;
3825         }
3826         if (isaligned && args.fsbno == NULLFSBLOCK) {
3827                 /*
3828                  * allocation failed, so turn off alignment and
3829                  * try again.
3830                  */
3831                 args.type = atype;
3832                 args.fsbno = ap->blkno;
3833                 args.alignment = 0;
3834                 if ((error = xfs_alloc_vextent(&args)))
3835                         return error;
3836         }
3837         if (args.fsbno == NULLFSBLOCK && nullfb &&
3838             args.minlen > ap->minlen) {
3839                 args.minlen = ap->minlen;
3840                 args.type = XFS_ALLOCTYPE_START_BNO;
3841                 args.fsbno = ap->blkno;
3842                 if ((error = xfs_alloc_vextent(&args)))
3843                         return error;
3844         }
3845         if (args.fsbno == NULLFSBLOCK && nullfb) {
3846                 args.fsbno = 0;
3847                 args.type = XFS_ALLOCTYPE_FIRST_AG;
3848                 args.total = ap->minlen;
3849                 args.minleft = 0;
3850                 if ((error = xfs_alloc_vextent(&args)))
3851                         return error;
3852                 ap->flist->xbf_low = 1;
3853         }
3854         if (args.fsbno != NULLFSBLOCK) {
3855                 /*
3856                  * check the allocation happened at the same or higher AG than
3857                  * the first block that was allocated.
3858                  */
3859                 ASSERT(*ap->firstblock == NULLFSBLOCK ||
3860                        XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
3861                        XFS_FSB_TO_AGNO(mp, args.fsbno) ||
3862                        (ap->flist->xbf_low &&
3863                         XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
3864                         XFS_FSB_TO_AGNO(mp, args.fsbno)));
3865
3866                 ap->blkno = args.fsbno;
3867                 if (*ap->firstblock == NULLFSBLOCK)
3868                         *ap->firstblock = args.fsbno;
3869                 ASSERT(nullfb || fb_agno == args.agno ||
3870                        (ap->flist->xbf_low && fb_agno < args.agno));
3871                 ap->length = args.len;
3872                 ap->ip->i_d.di_nblocks += args.len;
3873                 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3874                 if (ap->wasdel)
3875                         ap->ip->i_delayed_blks -= args.len;
3876                 /*
3877                  * Adjust the disk quota also. This was reserved
3878                  * earlier.
3879                  */
3880                 xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3881                         ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
3882                                         XFS_TRANS_DQ_BCOUNT,
3883                         (long) args.len);
3884         } else {
3885                 ap->blkno = NULLFSBLOCK;
3886                 ap->length = 0;
3887         }
3888         return 0;
3889 }
3890
3891 /*
3892  * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
3893  * It figures out where to ask the underlying allocator to put the new extent.
3894  */
3895 STATIC int
3896 xfs_bmap_alloc(
3897         struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3898 {
3899         if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata)
3900                 return xfs_bmap_rtalloc(ap);
3901         return xfs_bmap_btalloc(ap);
3902 }
3903
3904 /*
3905  * Trim the returned map to the required bounds
3906  */
3907 STATIC void
3908 xfs_bmapi_trim_map(
3909         struct xfs_bmbt_irec    *mval,
3910         struct xfs_bmbt_irec    *got,
3911         xfs_fileoff_t           *bno,
3912         xfs_filblks_t           len,
3913         xfs_fileoff_t           obno,
3914         xfs_fileoff_t           end,
3915         int                     n,
3916         int                     flags)
3917 {
3918         if ((flags & XFS_BMAPI_ENTIRE) ||
3919             got->br_startoff + got->br_blockcount <= obno) {
3920                 *mval = *got;
3921                 if (isnullstartblock(got->br_startblock))
3922                         mval->br_startblock = DELAYSTARTBLOCK;
3923                 return;
3924         }
3925
3926         if (obno > *bno)
3927                 *bno = obno;
3928         ASSERT((*bno >= obno) || (n == 0));
3929         ASSERT(*bno < end);
3930         mval->br_startoff = *bno;
3931         if (isnullstartblock(got->br_startblock))
3932                 mval->br_startblock = DELAYSTARTBLOCK;
3933         else
3934                 mval->br_startblock = got->br_startblock +
3935                                         (*bno - got->br_startoff);
3936         /*
3937          * Return the minimum of what we got and what we asked for for
3938          * the length.  We can use the len variable here because it is
3939          * modified below and we could have been there before coming
3940          * here if the first part of the allocation didn't overlap what
3941          * was asked for.
3942          */
3943         mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3944                         got->br_blockcount - (*bno - got->br_startoff));
3945         mval->br_state = got->br_state;
3946         ASSERT(mval->br_blockcount <= len);
3947         return;
3948 }
3949
3950 /*
3951  * Update and validate the extent map to return
3952  */
3953 STATIC void
3954 xfs_bmapi_update_map(
3955         struct xfs_bmbt_irec    **map,
3956         xfs_fileoff_t           *bno,
3957         xfs_filblks_t           *len,
3958         xfs_fileoff_t           obno,
3959         xfs_fileoff_t           end,
3960         int                     *n,
3961         int                     flags)
3962 {
3963         xfs_bmbt_irec_t *mval = *map;
3964
3965         ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3966                ((mval->br_startoff + mval->br_blockcount) <= end));
3967         ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3968                (mval->br_startoff < obno));
3969
3970         *bno = mval->br_startoff + mval->br_blockcount;
3971         *len = end - *bno;
3972         if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3973                 /* update previous map with new information */
3974                 ASSERT(mval->br_startblock == mval[-1].br_startblock);
3975                 ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3976                 ASSERT(mval->br_state == mval[-1].br_state);
3977                 mval[-1].br_blockcount = mval->br_blockcount;
3978                 mval[-1].br_state = mval->br_state;
3979         } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3980                    mval[-1].br_startblock != DELAYSTARTBLOCK &&
3981                    mval[-1].br_startblock != HOLESTARTBLOCK &&
3982                    mval->br_startblock == mval[-1].br_startblock +
3983                                           mval[-1].br_blockcount &&
3984                    ((flags & XFS_BMAPI_IGSTATE) ||
3985                         mval[-1].br_state == mval->br_state)) {
3986                 ASSERT(mval->br_startoff ==
3987                        mval[-1].br_startoff + mval[-1].br_blockcount);
3988                 mval[-1].br_blockcount += mval->br_blockcount;
3989         } else if (*n > 0 &&
3990                    mval->br_startblock == DELAYSTARTBLOCK &&
3991                    mval[-1].br_startblock == DELAYSTARTBLOCK &&
3992                    mval->br_startoff ==
3993                    mval[-1].br_startoff + mval[-1].br_blockcount) {
3994                 mval[-1].br_blockcount += mval->br_blockcount;
3995                 mval[-1].br_state = mval->br_state;
3996         } else if (!((*n == 0) &&
3997                      ((mval->br_startoff + mval->br_blockcount) <=
3998                       obno))) {
3999                 mval++;
4000                 (*n)++;
4001         }
4002         *map = mval;
4003 }
4004
4005 /*
4006  * Map file blocks to filesystem blocks without allocation.
4007  */
4008 int
4009 xfs_bmapi_read(
4010         struct xfs_inode        *ip,
4011         xfs_fileoff_t           bno,
4012         xfs_filblks_t           len,
4013         struct xfs_bmbt_irec    *mval,
4014         int                     *nmap,
4015         int                     flags)
4016 {
4017         struct xfs_mount        *mp = ip->i_mount;
4018         struct xfs_ifork        *ifp;
4019         struct xfs_bmbt_irec    got;
4020         struct xfs_bmbt_irec    prev;
4021         xfs_fileoff_t           obno;
4022         xfs_fileoff_t           end;
4023         xfs_extnum_t            lastx;
4024         int                     error;
4025         int                     eof;
4026         int                     n = 0;
4027         int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4028                                                 XFS_ATTR_FORK : XFS_DATA_FORK;
4029
4030         ASSERT(*nmap >= 1);
4031         ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
4032                            XFS_BMAPI_IGSTATE)));
4033         ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
4034
4035         if (unlikely(XFS_TEST_ERROR(
4036             (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4037              XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4038              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4039                 XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
4040                 return -EFSCORRUPTED;
4041         }
4042
4043         if (XFS_FORCED_SHUTDOWN(mp))
4044                 return -EIO;
4045
4046         XFS_STATS_INC(mp, xs_blk_mapr);
4047
4048         ifp = XFS_IFORK_PTR(ip, whichfork);
4049
4050         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4051                 error = xfs_iread_extents(NULL, ip, whichfork);
4052                 if (error)
4053                         return error;
4054         }
4055
4056         xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
4057         end = bno + len;
4058         obno = bno;
4059
4060         while (bno < end && n < *nmap) {
4061                 /* Reading past eof, act as though there's a hole up to end. */
4062                 if (eof)
4063                         got.br_startoff = end;
4064                 if (got.br_startoff > bno) {
4065                         /* Reading in a hole.  */
4066                         mval->br_startoff = bno;
4067                         mval->br_startblock = HOLESTARTBLOCK;
4068                         mval->br_blockcount =
4069                                 XFS_FILBLKS_MIN(len, got.br_startoff - bno);
4070                         mval->br_state = XFS_EXT_NORM;
4071                         bno += mval->br_blockcount;
4072                         len -= mval->br_blockcount;
4073                         mval++;
4074                         n++;
4075                         continue;
4076                 }
4077
4078                 /* set up the extent map to return. */
4079                 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4080                 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4081
4082                 /* If we're done, stop now. */
4083                 if (bno >= end || n >= *nmap)
4084                         break;
4085
4086                 /* Else go on to the next record. */
4087                 if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
4088                         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
4089                 else
4090                         eof = 1;
4091         }
4092         *nmap = n;
4093         return 0;
4094 }
4095
4096 STATIC int
4097 xfs_bmapi_reserve_delalloc(
4098         struct xfs_inode        *ip,
4099         xfs_fileoff_t           aoff,
4100         xfs_filblks_t           len,
4101         struct xfs_bmbt_irec    *got,
4102         struct xfs_bmbt_irec    *prev,
4103         xfs_extnum_t            *lastx,
4104         int                     eof)
4105 {
4106         struct xfs_mount        *mp = ip->i_mount;
4107         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
4108         xfs_extlen_t            alen;
4109         xfs_extlen_t            indlen;
4110         char                    rt = XFS_IS_REALTIME_INODE(ip);
4111         xfs_extlen_t            extsz;
4112         int                     error;
4113
4114         alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
4115         if (!eof)
4116                 alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
4117
4118         /* Figure out the extent size, adjust alen */
4119         extsz = xfs_get_extsz_hint(ip);
4120         if (extsz) {
4121                 error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
4122                                                1, 0, &aoff, &alen);
4123                 ASSERT(!error);
4124         }
4125
4126         if (rt)
4127                 extsz = alen / mp->m_sb.sb_rextsize;
4128
4129         /*
4130          * Make a transaction-less quota reservation for delayed allocation
4131          * blocks.  This number gets adjusted later.  We return if we haven't
4132          * allocated blocks already inside this loop.
4133          */
4134         error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
4135                         rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4136         if (error)
4137                 return error;
4138
4139         /*
4140          * Split changing sb for alen and indlen since they could be coming
4141          * from different places.
4142          */
4143         indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4144         ASSERT(indlen > 0);
4145
4146         if (rt) {
4147                 error = xfs_mod_frextents(mp, -((int64_t)extsz));
4148         } else {
4149                 error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4150         }
4151
4152         if (error)
4153                 goto out_unreserve_quota;
4154
4155         error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4156         if (error)
4157                 goto out_unreserve_blocks;
4158
4159
4160         ip->i_delayed_blks += alen;
4161
4162         got->br_startoff = aoff;
4163         got->br_startblock = nullstartblock(indlen);
4164         got->br_blockcount = alen;
4165         got->br_state = XFS_EXT_NORM;
4166         xfs_bmap_add_extent_hole_delay(ip, lastx, got);
4167
4168         /*
4169          * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
4170          * might have merged it into one of the neighbouring ones.
4171          */
4172         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
4173
4174         ASSERT(got->br_startoff <= aoff);
4175         ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
4176         ASSERT(isnullstartblock(got->br_startblock));
4177         ASSERT(got->br_state == XFS_EXT_NORM);
4178         return 0;
4179
4180 out_unreserve_blocks:
4181         if (rt)
4182                 xfs_mod_frextents(mp, extsz);
4183         else
4184                 xfs_mod_fdblocks(mp, alen, false);
4185 out_unreserve_quota:
4186         if (XFS_IS_QUOTA_ON(mp))
4187                 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
4188                                 XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4189         return error;
4190 }
4191
4192 /*
4193  * Map file blocks to filesystem blocks, adding delayed allocations as needed.
4194  */
4195 int
4196 xfs_bmapi_delay(
4197         struct xfs_inode        *ip,    /* incore inode */
4198         xfs_fileoff_t           bno,    /* starting file offs. mapped */
4199         xfs_filblks_t           len,    /* length to map in file */
4200         struct xfs_bmbt_irec    *mval,  /* output: map values */
4201         int                     *nmap,  /* i/o: mval size/count */
4202         int                     flags)  /* XFS_BMAPI_... */
4203 {
4204         struct xfs_mount        *mp = ip->i_mount;
4205         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
4206         struct xfs_bmbt_irec    got;    /* current file extent record */
4207         struct xfs_bmbt_irec    prev;   /* previous file extent record */
4208         xfs_fileoff_t           obno;   /* old block number (offset) */
4209         xfs_fileoff_t           end;    /* end of mapped file region */
4210         xfs_extnum_t            lastx;  /* last useful extent number */
4211         int                     eof;    /* we've hit the end of extents */
4212         int                     n = 0;  /* current extent index */
4213         int                     error = 0;
4214
4215         ASSERT(*nmap >= 1);
4216         ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4217         ASSERT(!(flags & ~XFS_BMAPI_ENTIRE));
4218         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4219
4220         if (unlikely(XFS_TEST_ERROR(
4221             (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
4222              XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
4223              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4224                 XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp);
4225                 return -EFSCORRUPTED;
4226         }
4227
4228         if (XFS_FORCED_SHUTDOWN(mp))
4229                 return -EIO;
4230
4231         XFS_STATS_INC(mp, xs_blk_mapw);
4232
4233         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4234                 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
4235                 if (error)
4236                         return error;
4237         }
4238
4239         xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev);
4240         end = bno + len;
4241         obno = bno;
4242
4243         while (bno < end && n < *nmap) {
4244                 if (eof || got.br_startoff > bno) {
4245                         error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got,
4246                                                            &prev, &lastx, eof);
4247                         if (error) {
4248                                 if (n == 0) {
4249                                         *nmap = 0;
4250                                         return error;
4251                                 }
4252                                 break;
4253                         }
4254                 }
4255
4256                 /* set up the extent map to return. */
4257                 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4258                 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4259
4260                 /* If we're done, stop now. */
4261                 if (bno >= end || n >= *nmap)
4262                         break;
4263
4264                 /* Else go on to the next record. */
4265                 prev = got;
4266                 if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
4267                         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
4268                 else
4269                         eof = 1;
4270         }
4271
4272         *nmap = n;
4273         return 0;
4274 }
4275
4276
4277 static int
4278 xfs_bmapi_allocate(
4279         struct xfs_bmalloca     *bma)
4280 {
4281         struct xfs_mount        *mp = bma->ip->i_mount;
4282         int                     whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
4283                                                 XFS_ATTR_FORK : XFS_DATA_FORK;
4284         struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4285         int                     tmp_logflags = 0;
4286         int                     error;
4287
4288         ASSERT(bma->length > 0);
4289
4290         /*
4291          * For the wasdelay case, we could also just allocate the stuff asked
4292          * for in this bmap call but that wouldn't be as good.
4293          */
4294         if (bma->wasdel) {
4295                 bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4296                 bma->offset = bma->got.br_startoff;
4297                 if (bma->idx != NULLEXTNUM && bma->idx) {
4298                         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1),
4299                                          &bma->prev);
4300                 }
4301         } else {
4302                 bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
4303                 if (!bma->eof)
4304                         bma->length = XFS_FILBLKS_MIN(bma->length,
4305                                         bma->got.br_startoff - bma->offset);
4306         }
4307
4308         /*
4309          * Indicate if this is the first user data in the file, or just any
4310          * user data. And if it is userdata, indicate whether it needs to
4311          * be initialised to zero during allocation.
4312          */
4313         if (!(bma->flags & XFS_BMAPI_METADATA)) {
4314                 bma->userdata = (bma->offset == 0) ?
4315                         XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
4316                 if (bma->flags & XFS_BMAPI_ZERO)
4317                         bma->userdata |= XFS_ALLOC_USERDATA_ZERO;
4318         }
4319
4320         bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
4321
4322         /*
4323          * Only want to do the alignment at the eof if it is userdata and
4324          * allocation length is larger than a stripe unit.
4325          */
4326         if (mp->m_dalign && bma->length >= mp->m_dalign &&
4327             !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
4328                 error = xfs_bmap_isaeof(bma, whichfork);
4329                 if (error)
4330                         return error;
4331         }
4332
4333         error = xfs_bmap_alloc(bma);
4334         if (error)
4335                 return error;
4336
4337         if (bma->flist->xbf_low)
4338                 bma->minleft = 0;
4339         if (bma->cur)
4340                 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4341         if (bma->blkno == NULLFSBLOCK)
4342                 return 0;
4343         if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4344                 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4345                 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4346                 bma->cur->bc_private.b.flist = bma->flist;
4347         }
4348         /*
4349          * Bump the number of extents we've allocated
4350          * in this call.
4351          */
4352         bma->nallocs++;
4353
4354         if (bma->cur)
4355                 bma->cur->bc_private.b.flags =
4356                         bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
4357
4358         bma->got.br_startoff = bma->offset;
4359         bma->got.br_startblock = bma->blkno;
4360         bma->got.br_blockcount = bma->length;
4361         bma->got.br_state = XFS_EXT_NORM;
4362
4363         /*
4364          * A wasdelay extent has been initialized, so shouldn't be flagged
4365          * as unwritten.
4366          */
4367         if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
4368             xfs_sb_version_hasextflgbit(&mp->m_sb))
4369                 bma->got.br_state = XFS_EXT_UNWRITTEN;
4370
4371         if (bma->wasdel)
4372                 error = xfs_bmap_add_extent_delay_real(bma);
4373         else
4374                 error = xfs_bmap_add_extent_hole_real(bma, whichfork);
4375
4376         bma->logflags |= tmp_logflags;
4377         if (error)
4378                 return error;
4379
4380         /*
4381          * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4382          * or xfs_bmap_add_extent_hole_real might have merged it into one of
4383          * the neighbouring ones.
4384          */
4385         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
4386
4387         ASSERT(bma->got.br_startoff <= bma->offset);
4388         ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4389                bma->offset + bma->length);
4390         ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4391                bma->got.br_state == XFS_EXT_UNWRITTEN);
4392         return 0;
4393 }
4394
4395 STATIC int
4396 xfs_bmapi_convert_unwritten(
4397         struct xfs_bmalloca     *bma,
4398         struct xfs_bmbt_irec    *mval,
4399         xfs_filblks_t           len,
4400         int                     flags)
4401 {
4402         int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4403                                                 XFS_ATTR_FORK : XFS_DATA_FORK;
4404         struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4405         int                     tmp_logflags = 0;
4406         int                     error;
4407
4408         /* check if we need to do unwritten->real conversion */
4409         if (mval->br_state == XFS_EXT_UNWRITTEN &&
4410             (flags & XFS_BMAPI_PREALLOC))
4411                 return 0;
4412
4413         /* check if we need to do real->unwritten conversion */
4414         if (mval->br_state == XFS_EXT_NORM &&
4415             (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4416                         (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4417                 return 0;
4418
4419         /*
4420          * Modify (by adding) the state flag, if writing.
4421          */
4422         ASSERT(mval->br_blockcount <= len);
4423         if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4424                 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4425                                         bma->ip, whichfork);
4426                 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4427                 bma->cur->bc_private.b.flist = bma->flist;
4428         }
4429         mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4430                                 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4431
4432         /*
4433          * Before insertion into the bmbt, zero the range being converted
4434          * if required.
4435          */
4436         if (flags & XFS_BMAPI_ZERO) {
4437                 error = xfs_zero_extent(bma->ip, mval->br_startblock,
4438                                         mval->br_blockcount);
4439                 if (error)
4440                         return error;
4441         }
4442
4443         error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
4444                         &bma->cur, mval, bma->firstblock, bma->flist,
4445                         &tmp_logflags);
4446         /*
4447          * Log the inode core unconditionally in the unwritten extent conversion
4448          * path because the conversion might not have done so (e.g., if the
4449          * extent count hasn't changed). We need to make sure the inode is dirty
4450          * in the transaction for the sake of fsync(), even if nothing has
4451          * changed, because fsync() will not force the log for this transaction
4452          * unless it sees the inode pinned.
4453          */
4454         bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4455         if (error)
4456                 return error;
4457
4458         /*
4459          * Update our extent pointer, given that
4460          * xfs_bmap_add_extent_unwritten_real might have merged it into one
4461          * of the neighbouring ones.
4462          */
4463         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
4464
4465         /*
4466          * We may have combined previously unwritten space with written space,
4467          * so generate another request.
4468          */
4469         if (mval->br_blockcount < len)
4470                 return -EAGAIN;
4471         return 0;
4472 }
4473
4474 /*
4475  * Map file blocks to filesystem blocks, and allocate blocks or convert the
4476  * extent state if necessary.  Details behaviour is controlled by the flags
4477  * parameter.  Only allocates blocks from a single allocation group, to avoid
4478  * locking problems.
4479  *
4480  * The returned value in "firstblock" from the first call in a transaction
4481  * must be remembered and presented to subsequent calls in "firstblock".
4482  * An upper bound for the number of blocks to be allocated is supplied to
4483  * the first call in "total"; if no allocation group has that many free
4484  * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
4485  */
4486 int
4487 xfs_bmapi_write(
4488         struct xfs_trans        *tp,            /* transaction pointer */
4489         struct xfs_inode        *ip,            /* incore inode */
4490         xfs_fileoff_t           bno,            /* starting file offs. mapped */
4491         xfs_filblks_t           len,            /* length to map in file */
4492         int                     flags,          /* XFS_BMAPI_... */
4493         xfs_fsblock_t           *firstblock,    /* first allocated block
4494                                                    controls a.g. for allocs */
4495         xfs_extlen_t            total,          /* total blocks needed */
4496         struct xfs_bmbt_irec    *mval,          /* output: map values */
4497         int                     *nmap,          /* i/o: mval size/count */
4498         struct xfs_bmap_free    *flist)         /* i/o: list extents to free */
4499 {
4500         struct xfs_mount        *mp = ip->i_mount;
4501         struct xfs_ifork        *ifp;
4502         struct xfs_bmalloca     bma = { NULL }; /* args for xfs_bmap_alloc */
4503         xfs_fileoff_t           end;            /* end of mapped file region */
4504         int                     eof;            /* after the end of extents */
4505         int                     error;          /* error return */
4506         int                     n;              /* current extent index */
4507         xfs_fileoff_t           obno;           /* old block number (offset) */
4508         int                     whichfork;      /* data or attr fork */
4509         char                    inhole;         /* current location is hole in file */
4510         char                    wasdelay;       /* old extent was delayed */
4511
4512 #ifdef DEBUG
4513         xfs_fileoff_t           orig_bno;       /* original block number value */
4514         int                     orig_flags;     /* original flags arg value */
4515         xfs_filblks_t           orig_len;       /* original value of len arg */
4516         struct xfs_bmbt_irec    *orig_mval;     /* original value of mval */
4517         int                     orig_nmap;      /* original value of *nmap */
4518
4519         orig_bno = bno;
4520         orig_len = len;
4521         orig_flags = flags;
4522         orig_mval = mval;
4523         orig_nmap = *nmap;
4524 #endif
4525         whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4526                 XFS_ATTR_FORK : XFS_DATA_FORK;
4527
4528         ASSERT(*nmap >= 1);
4529         ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4530         ASSERT(!(flags & XFS_BMAPI_IGSTATE));
4531         ASSERT(tp != NULL);
4532         ASSERT(len > 0);
4533         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
4534         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4535
4536         /* zeroing is for currently only for data extents, not metadata */
4537         ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4538                         (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4539         /*
4540          * we can allocate unwritten extents or pre-zero allocated blocks,
4541          * but it makes no sense to do both at once. This would result in
4542          * zeroing the unwritten extent twice, but it still being an
4543          * unwritten extent....
4544          */
4545         ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4546                         (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4547
4548         if (unlikely(XFS_TEST_ERROR(
4549             (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4550              XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4551              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4552                 XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
4553                 return -EFSCORRUPTED;
4554         }
4555
4556         if (XFS_FORCED_SHUTDOWN(mp))
4557                 return -EIO;
4558
4559         ifp = XFS_IFORK_PTR(ip, whichfork);
4560
4561         XFS_STATS_INC(mp, xs_blk_mapw);
4562
4563         if (*firstblock == NULLFSBLOCK) {
4564                 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
4565                         bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
4566                 else
4567                         bma.minleft = 1;
4568         } else {
4569                 bma.minleft = 0;
4570         }
4571
4572         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4573                 error = xfs_iread_extents(tp, ip, whichfork);
4574                 if (error)
4575                         goto error0;
4576         }
4577
4578         xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got,
4579                                 &bma.prev);
4580         n = 0;
4581         end = bno + len;
4582         obno = bno;
4583
4584         bma.tp = tp;
4585         bma.ip = ip;
4586         bma.total = total;
4587         bma.userdata = 0;
4588         bma.flist = flist;
4589         bma.firstblock = firstblock;
4590
4591         while (bno < end && n < *nmap) {
4592                 inhole = eof || bma.got.br_startoff > bno;
4593                 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
4594
4595                 /*
4596                  * First, deal with the hole before the allocated space
4597                  * that we found, if any.
4598                  */
4599                 if (inhole || wasdelay) {
4600                         bma.eof = eof;
4601                         bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4602                         bma.wasdel = wasdelay;
4603                         bma.offset = bno;
4604                         bma.flags = flags;
4605
4606                         /*
4607                          * There's a 32/64 bit type mismatch between the
4608                          * allocation length request (which can be 64 bits in
4609                          * length) and the bma length request, which is
4610                          * xfs_extlen_t and therefore 32 bits. Hence we have to
4611                          * check for 32-bit overflows and handle them here.
4612                          */
4613                         if (len > (xfs_filblks_t)MAXEXTLEN)
4614                                 bma.length = MAXEXTLEN;
4615                         else
4616                                 bma.length = len;
4617
4618                         ASSERT(len > 0);
4619                         ASSERT(bma.length > 0);
4620                         error = xfs_bmapi_allocate(&bma);
4621                         if (error)
4622                                 goto error0;
4623                         if (bma.blkno == NULLFSBLOCK)
4624                                 break;
4625                 }
4626
4627                 /* Deal with the allocated space we found.  */
4628                 xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4629                                                         end, n, flags);
4630
4631                 /* Execute unwritten extent conversion if necessary */
4632                 error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4633                 if (error == -EAGAIN)
4634                         continue;
4635                 if (error)
4636                         goto error0;
4637
4638                 /* update the extent map to return */
4639                 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4640
4641                 /*
4642                  * If we're done, stop now.  Stop when we've allocated
4643                  * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4644                  * the transaction may get too big.
4645                  */
4646                 if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4647                         break;
4648
4649                 /* Else go on to the next record. */
4650                 bma.prev = bma.got;
4651                 if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) {
4652                         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx),
4653                                          &bma.got);
4654                 } else
4655                         eof = 1;
4656         }
4657         *nmap = n;
4658
4659         /*
4660          * Transform from btree to extents, give it cur.
4661          */
4662         if (xfs_bmap_wants_extents(ip, whichfork)) {
4663                 int             tmp_logflags = 0;
4664
4665                 ASSERT(bma.cur);
4666                 error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
4667                         &tmp_logflags, whichfork);
4668                 bma.logflags |= tmp_logflags;
4669                 if (error)
4670                         goto error0;
4671         }
4672
4673         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
4674                XFS_IFORK_NEXTENTS(ip, whichfork) >
4675                 XFS_IFORK_MAXEXT(ip, whichfork));
4676         error = 0;
4677 error0:
4678         /*
4679          * Log everything.  Do this after conversion, there's no point in
4680          * logging the extent records if we've converted to btree format.
4681          */
4682         if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
4683             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
4684                 bma.logflags &= ~xfs_ilog_fext(whichfork);
4685         else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
4686                  XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
4687                 bma.logflags &= ~xfs_ilog_fbroot(whichfork);
4688         /*
4689          * Log whatever the flags say, even if error.  Otherwise we might miss
4690          * detecting a case where the data is changed, there's an error,
4691          * and it's not logged so we don't shutdown when we should.
4692          */
4693         if (bma.logflags)
4694                 xfs_trans_log_inode(tp, ip, bma.logflags);
4695
4696         if (bma.cur) {
4697                 if (!error) {
4698                         ASSERT(*firstblock == NULLFSBLOCK ||
4699                                XFS_FSB_TO_AGNO(mp, *firstblock) ==
4700                                XFS_FSB_TO_AGNO(mp,
4701                                        bma.cur->bc_private.b.firstblock) ||
4702                                (flist->xbf_low &&
4703                                 XFS_FSB_TO_AGNO(mp, *firstblock) <
4704                                 XFS_FSB_TO_AGNO(mp,
4705                                         bma.cur->bc_private.b.firstblock)));
4706                         *firstblock = bma.cur->bc_private.b.firstblock;
4707                 }
4708                 xfs_btree_del_cursor(bma.cur,
4709                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
4710         }
4711         if (!error)
4712                 xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4713                         orig_nmap, *nmap);
4714         return error;
4715 }
4716
4717 /*
4718  * Called by xfs_bmapi to update file extent records and the btree
4719  * after removing space (or undoing a delayed allocation).
4720  */
4721 STATIC int                              /* error */
4722 xfs_bmap_del_extent(
4723         xfs_inode_t             *ip,    /* incore inode pointer */
4724         xfs_trans_t             *tp,    /* current transaction pointer */
4725         xfs_extnum_t            *idx,   /* extent number to update/delete */
4726         xfs_bmap_free_t         *flist, /* list of extents to be freed */
4727         xfs_btree_cur_t         *cur,   /* if null, not a btree */
4728         xfs_bmbt_irec_t         *del,   /* data to remove from extents */
4729         int                     *logflagsp, /* inode logging flags */
4730         int                     whichfork) /* data or attr fork */
4731 {
4732         xfs_filblks_t           da_new; /* new delay-alloc indirect blocks */
4733         xfs_filblks_t           da_old; /* old delay-alloc indirect blocks */
4734         xfs_fsblock_t           del_endblock=0; /* first block past del */
4735         xfs_fileoff_t           del_endoff;     /* first offset past del */
4736         int                     delay;  /* current block is delayed allocated */
4737         int                     do_fx;  /* free extent at end of routine */
4738         xfs_bmbt_rec_host_t     *ep;    /* current extent entry pointer */
4739         int                     error;  /* error return value */
4740         int                     flags;  /* inode logging flags */
4741         xfs_bmbt_irec_t         got;    /* current extent entry */
4742         xfs_fileoff_t           got_endoff;     /* first offset past got */
4743         int                     i;      /* temp state */
4744         xfs_ifork_t             *ifp;   /* inode fork pointer */
4745         xfs_mount_t             *mp;    /* mount structure */
4746         xfs_filblks_t           nblks;  /* quota/sb block count */
4747         xfs_bmbt_irec_t         new;    /* new record to be inserted */
4748         /* REFERENCED */
4749         uint                    qfield; /* quota field to update */
4750         xfs_filblks_t           temp;   /* for indirect length calculations */
4751         xfs_filblks_t           temp2;  /* for indirect length calculations */
4752         int                     state = 0;
4753
4754         mp = ip->i_mount;
4755         XFS_STATS_INC(mp, xs_del_exlist);
4756
4757         if (whichfork == XFS_ATTR_FORK)
4758                 state |= BMAP_ATTRFORK;
4759
4760         ifp = XFS_IFORK_PTR(ip, whichfork);
4761         ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
4762                 (uint)sizeof(xfs_bmbt_rec_t)));
4763         ASSERT(del->br_blockcount > 0);
4764         ep = xfs_iext_get_ext(ifp, *idx);
4765         xfs_bmbt_get_all(ep, &got);
4766         ASSERT(got.br_startoff <= del->br_startoff);
4767         del_endoff = del->br_startoff + del->br_blockcount;
4768         got_endoff = got.br_startoff + got.br_blockcount;
4769         ASSERT(got_endoff >= del_endoff);
4770         delay = isnullstartblock(got.br_startblock);
4771         ASSERT(isnullstartblock(del->br_startblock) == delay);
4772         flags = 0;
4773         qfield = 0;
4774         error = 0;
4775         /*
4776          * If deleting a real allocation, must free up the disk space.
4777          */
4778         if (!delay) {
4779                 flags = XFS_ILOG_CORE;
4780                 /*
4781                  * Realtime allocation.  Free it and record di_nblocks update.
4782                  */
4783                 if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
4784                         xfs_fsblock_t   bno;
4785                         xfs_filblks_t   len;
4786
4787                         ASSERT(do_mod(del->br_blockcount,
4788                                       mp->m_sb.sb_rextsize) == 0);
4789                         ASSERT(do_mod(del->br_startblock,
4790                                       mp->m_sb.sb_rextsize) == 0);
4791                         bno = del->br_startblock;
4792                         len = del->br_blockcount;
4793                         do_div(bno, mp->m_sb.sb_rextsize);
4794                         do_div(len, mp->m_sb.sb_rextsize);
4795                         error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
4796                         if (error)
4797                                 goto done;
4798                         do_fx = 0;
4799                         nblks = len * mp->m_sb.sb_rextsize;
4800                         qfield = XFS_TRANS_DQ_RTBCOUNT;
4801                 }
4802                 /*
4803                  * Ordinary allocation.
4804                  */
4805                 else {
4806                         do_fx = 1;
4807                         nblks = del->br_blockcount;
4808                         qfield = XFS_TRANS_DQ_BCOUNT;
4809                 }
4810                 /*
4811                  * Set up del_endblock and cur for later.
4812                  */
4813                 del_endblock = del->br_startblock + del->br_blockcount;
4814                 if (cur) {
4815                         if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
4816                                         got.br_startblock, got.br_blockcount,
4817                                         &i)))
4818                                 goto done;
4819                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4820                 }
4821                 da_old = da_new = 0;
4822         } else {
4823                 da_old = startblockval(got.br_startblock);
4824                 da_new = 0;
4825                 nblks = 0;
4826                 do_fx = 0;
4827         }
4828         /*
4829          * Set flag value to use in switch statement.
4830          * Left-contig is 2, right-contig is 1.
4831          */
4832         switch (((got.br_startoff == del->br_startoff) << 1) |
4833                 (got_endoff == del_endoff)) {
4834         case 3:
4835                 /*
4836                  * Matches the whole extent.  Delete the entry.
4837                  */
4838                 xfs_iext_remove(ip, *idx, 1,
4839                                 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
4840                 --*idx;
4841                 if (delay)
4842                         break;
4843
4844                 XFS_IFORK_NEXT_SET(ip, whichfork,
4845                         XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
4846                 flags |= XFS_ILOG_CORE;
4847                 if (!cur) {
4848                         flags |= xfs_ilog_fext(whichfork);
4849                         break;
4850                 }
4851                 if ((error = xfs_btree_delete(cur, &i)))
4852                         goto done;
4853                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4854                 break;
4855
4856         case 2:
4857                 /*
4858                  * Deleting the first part of the extent.
4859                  */
4860                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4861                 xfs_bmbt_set_startoff(ep, del_endoff);
4862                 temp = got.br_blockcount - del->br_blockcount;
4863                 xfs_bmbt_set_blockcount(ep, temp);
4864                 if (delay) {
4865                         temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
4866                                 da_old);
4867                         xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
4868                         trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4869                         da_new = temp;
4870                         break;
4871                 }
4872                 xfs_bmbt_set_startblock(ep, del_endblock);
4873                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4874                 if (!cur) {
4875                         flags |= xfs_ilog_fext(whichfork);
4876                         break;
4877                 }
4878                 if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock,
4879                                 got.br_blockcount - del->br_blockcount,
4880                                 got.br_state)))
4881                         goto done;
4882                 break;
4883
4884         case 1:
4885                 /*
4886                  * Deleting the last part of the extent.
4887                  */
4888                 temp = got.br_blockcount - del->br_blockcount;
4889                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4890                 xfs_bmbt_set_blockcount(ep, temp);
4891                 if (delay) {
4892                         temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
4893                                 da_old);
4894                         xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
4895                         trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4896                         da_new = temp;
4897                         break;
4898                 }
4899                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4900                 if (!cur) {
4901                         flags |= xfs_ilog_fext(whichfork);
4902                         break;
4903                 }
4904                 if ((error = xfs_bmbt_update(cur, got.br_startoff,
4905                                 got.br_startblock,
4906                                 got.br_blockcount - del->br_blockcount,
4907                                 got.br_state)))
4908                         goto done;
4909                 break;
4910
4911         case 0:
4912                 /*
4913                  * Deleting the middle of the extent.
4914                  */
4915                 temp = del->br_startoff - got.br_startoff;
4916                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4917                 xfs_bmbt_set_blockcount(ep, temp);
4918                 new.br_startoff = del_endoff;
4919                 temp2 = got_endoff - del_endoff;
4920                 new.br_blockcount = temp2;
4921                 new.br_state = got.br_state;
4922                 if (!delay) {
4923                         new.br_startblock = del_endblock;
4924                         flags |= XFS_ILOG_CORE;
4925                         if (cur) {
4926                                 if ((error = xfs_bmbt_update(cur,
4927                                                 got.br_startoff,
4928                                                 got.br_startblock, temp,
4929                                                 got.br_state)))
4930                                         goto done;
4931                                 if ((error = xfs_btree_increment(cur, 0, &i)))
4932                                         goto done;
4933                                 cur->bc_rec.b = new;
4934                                 error = xfs_btree_insert(cur, &i);
4935                                 if (error && error != -ENOSPC)
4936                                         goto done;
4937                                 /*
4938                                  * If get no-space back from btree insert,
4939                                  * it tried a split, and we have a zero
4940                                  * block reservation.
4941                                  * Fix up our state and return the error.
4942                                  */
4943                                 if (error == -ENOSPC) {
4944                                         /*
4945                                          * Reset the cursor, don't trust
4946                                          * it after any insert operation.
4947                                          */
4948                                         if ((error = xfs_bmbt_lookup_eq(cur,
4949                                                         got.br_startoff,
4950                                                         got.br_startblock,
4951                                                         temp, &i)))
4952                                                 goto done;
4953                                         XFS_WANT_CORRUPTED_GOTO(mp,
4954                                                                 i == 1, done);
4955                                         /*
4956                                          * Update the btree record back
4957                                          * to the original value.
4958                                          */
4959                                         if ((error = xfs_bmbt_update(cur,
4960                                                         got.br_startoff,
4961                                                         got.br_startblock,
4962                                                         got.br_blockcount,
4963                                                         got.br_state)))
4964                                                 goto done;
4965                                         /*
4966                                          * Reset the extent record back
4967                                          * to the original value.
4968                                          */
4969                                         xfs_bmbt_set_blockcount(ep,
4970                                                 got.br_blockcount);
4971                                         flags = 0;
4972                                         error = -ENOSPC;
4973                                         goto done;
4974                                 }
4975                                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4976                         } else
4977                                 flags |= xfs_ilog_fext(whichfork);
4978                         XFS_IFORK_NEXT_SET(ip, whichfork,
4979                                 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
4980                 } else {
4981                         ASSERT(whichfork == XFS_DATA_FORK);
4982                         temp = xfs_bmap_worst_indlen(ip, temp);
4983                         xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
4984                         temp2 = xfs_bmap_worst_indlen(ip, temp2);
4985                         new.br_startblock = nullstartblock((int)temp2);
4986                         da_new = temp + temp2;
4987                         while (da_new > da_old) {
4988                                 if (temp) {
4989                                         temp--;
4990                                         da_new--;
4991                                         xfs_bmbt_set_startblock(ep,
4992                                                 nullstartblock((int)temp));
4993                                 }
4994                                 if (da_new == da_old)
4995                                         break;
4996                                 if (temp2) {
4997                                         temp2--;
4998                                         da_new--;
4999                                         new.br_startblock =
5000                                                 nullstartblock((int)temp2);
5001                                 }
5002                         }
5003                 }
5004                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5005                 xfs_iext_insert(ip, *idx + 1, 1, &new, state);
5006                 ++*idx;
5007                 break;
5008         }
5009         /*
5010          * If we need to, add to list of extents to delete.
5011          */
5012         if (do_fx)
5013                 xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
5014                         mp);
5015         /*
5016          * Adjust inode # blocks in the file.
5017          */
5018         if (nblks)
5019                 ip->i_d.di_nblocks -= nblks;
5020         /*
5021          * Adjust quota data.
5022          */
5023         if (qfield)
5024                 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5025
5026         /*
5027          * Account for change in delayed indirect blocks.
5028          * Nothing to do for disk quota accounting here.
5029          */
5030         ASSERT(da_old >= da_new);
5031         if (da_old > da_new)
5032                 xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
5033 done:
5034         *logflagsp = flags;
5035         return error;
5036 }
5037
5038 /*
5039  * Unmap (remove) blocks from a file.
5040  * If nexts is nonzero then the number of extents to remove is limited to
5041  * that value.  If not all extents in the block range can be removed then
5042  * *done is set.
5043  */
5044 int                                             /* error */
5045 xfs_bunmapi(
5046         xfs_trans_t             *tp,            /* transaction pointer */
5047         struct xfs_inode        *ip,            /* incore inode */
5048         xfs_fileoff_t           bno,            /* starting offset to unmap */
5049         xfs_filblks_t           len,            /* length to unmap in file */
5050         int                     flags,          /* misc flags */
5051         xfs_extnum_t            nexts,          /* number of extents max */
5052         xfs_fsblock_t           *firstblock,    /* first allocated block
5053                                                    controls a.g. for allocs */
5054         xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
5055         int                     *done)          /* set if not done yet */
5056 {
5057         xfs_btree_cur_t         *cur;           /* bmap btree cursor */
5058         xfs_bmbt_irec_t         del;            /* extent being deleted */
5059         int                     eof;            /* is deleting at eof */
5060         xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
5061         int                     error;          /* error return value */
5062         xfs_extnum_t            extno;          /* extent number in list */
5063         xfs_bmbt_irec_t         got;            /* current extent record */
5064         xfs_ifork_t             *ifp;           /* inode fork pointer */
5065         int                     isrt;           /* freeing in rt area */
5066         xfs_extnum_t            lastx;          /* last extent index used */
5067         int                     logflags;       /* transaction logging flags */
5068         xfs_extlen_t            mod;            /* rt extent offset */
5069         xfs_mount_t             *mp;            /* mount structure */
5070         xfs_extnum_t            nextents;       /* number of file extents */
5071         xfs_bmbt_irec_t         prev;           /* previous extent record */
5072         xfs_fileoff_t           start;          /* first file offset deleted */
5073         int                     tmp_logflags;   /* partial logging flags */
5074         int                     wasdel;         /* was a delayed alloc extent */
5075         int                     whichfork;      /* data or attribute fork */
5076         xfs_fsblock_t           sum;
5077
5078         trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
5079
5080         whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
5081                 XFS_ATTR_FORK : XFS_DATA_FORK;
5082         ifp = XFS_IFORK_PTR(ip, whichfork);
5083         if (unlikely(
5084             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5085             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
5086                 XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
5087                                  ip->i_mount);
5088                 return -EFSCORRUPTED;
5089         }
5090         mp = ip->i_mount;
5091         if (XFS_FORCED_SHUTDOWN(mp))
5092                 return -EIO;
5093
5094         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5095         ASSERT(len > 0);
5096         ASSERT(nexts >= 0);
5097
5098         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5099             (error = xfs_iread_extents(tp, ip, whichfork)))
5100                 return error;
5101         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
5102         if (nextents == 0) {
5103                 *done = 1;
5104                 return 0;
5105         }
5106         XFS_STATS_INC(mp, xs_blk_unmap);
5107         isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5108         start = bno;
5109         bno = start + len - 1;
5110         ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
5111                 &prev);
5112
5113         /*
5114          * Check to see if the given block number is past the end of the
5115          * file, back up to the last block if so...
5116          */
5117         if (eof) {
5118                 ep = xfs_iext_get_ext(ifp, --lastx);
5119                 xfs_bmbt_get_all(ep, &got);
5120                 bno = got.br_startoff + got.br_blockcount - 1;
5121         }
5122         logflags = 0;
5123         if (ifp->if_flags & XFS_IFBROOT) {
5124                 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5125                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5126                 cur->bc_private.b.firstblock = *firstblock;
5127                 cur->bc_private.b.flist = flist;
5128                 cur->bc_private.b.flags = 0;
5129         } else
5130                 cur = NULL;
5131
5132         if (isrt) {
5133                 /*
5134                  * Synchronize by locking the bitmap inode.
5135                  */
5136                 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
5137                 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5138         }
5139
5140         extno = 0;
5141         while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
5142                (nexts == 0 || extno < nexts)) {
5143                 /*
5144                  * Is the found extent after a hole in which bno lives?
5145                  * Just back up to the previous extent, if so.
5146                  */
5147                 if (got.br_startoff > bno) {
5148                         if (--lastx < 0)
5149                                 break;
5150                         ep = xfs_iext_get_ext(ifp, lastx);
5151                         xfs_bmbt_get_all(ep, &got);
5152                 }
5153                 /*
5154                  * Is the last block of this extent before the range
5155                  * we're supposed to delete?  If so, we're done.
5156                  */
5157                 bno = XFS_FILEOFF_MIN(bno,
5158                         got.br_startoff + got.br_blockcount - 1);
5159                 if (bno < start)
5160                         break;
5161                 /*
5162                  * Then deal with the (possibly delayed) allocated space
5163                  * we found.
5164                  */
5165                 ASSERT(ep != NULL);
5166                 del = got;
5167                 wasdel = isnullstartblock(del.br_startblock);
5168                 if (got.br_startoff < start) {
5169                         del.br_startoff = start;
5170                         del.br_blockcount -= start - got.br_startoff;
5171                         if (!wasdel)
5172                                 del.br_startblock += start - got.br_startoff;
5173                 }
5174                 if (del.br_startoff + del.br_blockcount > bno + 1)
5175                         del.br_blockcount = bno + 1 - del.br_startoff;
5176                 sum = del.br_startblock + del.br_blockcount;
5177                 if (isrt &&
5178                     (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
5179                         /*
5180                          * Realtime extent not lined up at the end.
5181                          * The extent could have been split into written
5182                          * and unwritten pieces, or we could just be
5183                          * unmapping part of it.  But we can't really
5184                          * get rid of part of a realtime extent.
5185                          */
5186                         if (del.br_state == XFS_EXT_UNWRITTEN ||
5187                             !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5188                                 /*
5189                                  * This piece is unwritten, or we're not
5190                                  * using unwritten extents.  Skip over it.
5191                                  */
5192                                 ASSERT(bno >= mod);
5193                                 bno -= mod > del.br_blockcount ?
5194                                         del.br_blockcount : mod;
5195                                 if (bno < got.br_startoff) {
5196                                         if (--lastx >= 0)
5197                                                 xfs_bmbt_get_all(xfs_iext_get_ext(
5198                                                         ifp, lastx), &got);
5199                                 }
5200                                 continue;
5201                         }
5202                         /*
5203                          * It's written, turn it unwritten.
5204                          * This is better than zeroing it.
5205                          */
5206                         ASSERT(del.br_state == XFS_EXT_NORM);
5207                         ASSERT(xfs_trans_get_block_res(tp) > 0);
5208                         /*
5209                          * If this spans a realtime extent boundary,
5210                          * chop it back to the start of the one we end at.
5211                          */
5212                         if (del.br_blockcount > mod) {
5213                                 del.br_startoff += del.br_blockcount - mod;
5214                                 del.br_startblock += del.br_blockcount - mod;
5215                                 del.br_blockcount = mod;
5216                         }
5217                         del.br_state = XFS_EXT_UNWRITTEN;
5218                         error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5219                                         &lastx, &cur, &del, firstblock, flist,
5220                                         &logflags);
5221                         if (error)
5222                                 goto error0;
5223                         goto nodelete;
5224                 }
5225                 if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) {
5226                         /*
5227                          * Realtime extent is lined up at the end but not
5228                          * at the front.  We'll get rid of full extents if
5229                          * we can.
5230                          */
5231                         mod = mp->m_sb.sb_rextsize - mod;
5232                         if (del.br_blockcount > mod) {
5233                                 del.br_blockcount -= mod;
5234                                 del.br_startoff += mod;
5235                                 del.br_startblock += mod;
5236                         } else if ((del.br_startoff == start &&
5237                                     (del.br_state == XFS_EXT_UNWRITTEN ||
5238                                      xfs_trans_get_block_res(tp) == 0)) ||
5239                                    !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5240                                 /*
5241                                  * Can't make it unwritten.  There isn't
5242                                  * a full extent here so just skip it.
5243                                  */
5244                                 ASSERT(bno >= del.br_blockcount);
5245                                 bno -= del.br_blockcount;
5246                                 if (got.br_startoff > bno) {
5247                                         if (--lastx >= 0) {
5248                                                 ep = xfs_iext_get_ext(ifp,
5249                                                                       lastx);
5250                                                 xfs_bmbt_get_all(ep, &got);
5251                                         }
5252                                 }
5253                                 continue;
5254                         } else if (del.br_state == XFS_EXT_UNWRITTEN) {
5255                                 /*
5256                                  * This one is already unwritten.
5257                                  * It must have a written left neighbor.
5258                                  * Unwrite the killed part of that one and
5259                                  * try again.
5260                                  */
5261                                 ASSERT(lastx > 0);
5262                                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
5263                                                 lastx - 1), &prev);
5264                                 ASSERT(prev.br_state == XFS_EXT_NORM);
5265                                 ASSERT(!isnullstartblock(prev.br_startblock));
5266                                 ASSERT(del.br_startblock ==
5267                                        prev.br_startblock + prev.br_blockcount);
5268                                 if (prev.br_startoff < start) {
5269                                         mod = start - prev.br_startoff;
5270                                         prev.br_blockcount -= mod;
5271                                         prev.br_startblock += mod;
5272                                         prev.br_startoff = start;
5273                                 }
5274                                 prev.br_state = XFS_EXT_UNWRITTEN;
5275                                 lastx--;
5276                                 error = xfs_bmap_add_extent_unwritten_real(tp,
5277                                                 ip, &lastx, &cur, &prev,
5278                                                 firstblock, flist, &logflags);
5279                                 if (error)
5280                                         goto error0;
5281                                 goto nodelete;
5282                         } else {
5283                                 ASSERT(del.br_state == XFS_EXT_NORM);
5284                                 del.br_state = XFS_EXT_UNWRITTEN;
5285                                 error = xfs_bmap_add_extent_unwritten_real(tp,
5286                                                 ip, &lastx, &cur, &del,
5287                                                 firstblock, flist, &logflags);
5288                                 if (error)
5289                                         goto error0;
5290                                 goto nodelete;
5291                         }
5292                 }
5293                 if (wasdel) {
5294                         ASSERT(startblockval(del.br_startblock) > 0);
5295                         /* Update realtime/data freespace, unreserve quota */
5296                         if (isrt) {
5297                                 xfs_filblks_t rtexts;
5298
5299                                 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
5300                                 do_div(rtexts, mp->m_sb.sb_rextsize);
5301                                 xfs_mod_frextents(mp, (int64_t)rtexts);
5302                                 (void)xfs_trans_reserve_quota_nblks(NULL,
5303                                         ip, -((long)del.br_blockcount), 0,
5304                                         XFS_QMOPT_RES_RTBLKS);
5305                         } else {
5306                                 xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
5307                                                  false);
5308                                 (void)xfs_trans_reserve_quota_nblks(NULL,
5309                                         ip, -((long)del.br_blockcount), 0,
5310                                         XFS_QMOPT_RES_REGBLKS);
5311                         }
5312                         ip->i_delayed_blks -= del.br_blockcount;
5313                         if (cur)
5314                                 cur->bc_private.b.flags |=
5315                                         XFS_BTCUR_BPRV_WASDEL;
5316                 } else if (cur)
5317                         cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
5318                 /*
5319                  * If it's the case where the directory code is running
5320                  * with no block reservation, and the deleted block is in
5321                  * the middle of its extent, and the resulting insert
5322                  * of an extent would cause transformation to btree format,
5323                  * then reject it.  The calling code will then swap
5324                  * blocks around instead.
5325                  * We have to do this now, rather than waiting for the
5326                  * conversion to btree format, since the transaction
5327                  * will be dirty.
5328                  */
5329                 if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
5330                     XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
5331                     XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
5332                         XFS_IFORK_MAXEXT(ip, whichfork) &&
5333                     del.br_startoff > got.br_startoff &&
5334                     del.br_startoff + del.br_blockcount <
5335                     got.br_startoff + got.br_blockcount) {
5336                         error = -ENOSPC;
5337                         goto error0;
5338                 }
5339                 error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
5340                                 &tmp_logflags, whichfork);
5341                 logflags |= tmp_logflags;
5342                 if (error)
5343                         goto error0;
5344                 bno = del.br_startoff - 1;
5345 nodelete:
5346                 /*
5347                  * If not done go on to the next (previous) record.
5348                  */
5349                 if (bno != (xfs_fileoff_t)-1 && bno >= start) {
5350                         if (lastx >= 0) {
5351                                 ep = xfs_iext_get_ext(ifp, lastx);
5352                                 if (xfs_bmbt_get_startoff(ep) > bno) {
5353                                         if (--lastx >= 0)
5354                                                 ep = xfs_iext_get_ext(ifp,
5355                                                                       lastx);
5356                                 }
5357                                 xfs_bmbt_get_all(ep, &got);
5358                         }
5359                         extno++;
5360                 }
5361         }
5362         *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0;
5363
5364         /*
5365          * Convert to a btree if necessary.
5366          */
5367         if (xfs_bmap_needs_btree(ip, whichfork)) {
5368                 ASSERT(cur == NULL);
5369                 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist,
5370                         &cur, 0, &tmp_logflags, whichfork);
5371                 logflags |= tmp_logflags;
5372                 if (error)
5373                         goto error0;
5374         }
5375         /*
5376          * transform from btree to extents, give it cur
5377          */
5378         else if (xfs_bmap_wants_extents(ip, whichfork)) {
5379                 ASSERT(cur != NULL);
5380                 error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
5381                         whichfork);
5382                 logflags |= tmp_logflags;
5383                 if (error)
5384                         goto error0;
5385         }
5386         /*
5387          * transform from extents to local?
5388          */
5389         error = 0;
5390 error0:
5391         /*
5392          * Log everything.  Do this after conversion, there's no point in
5393          * logging the extent records if we've converted to btree format.
5394          */
5395         if ((logflags & xfs_ilog_fext(whichfork)) &&
5396             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
5397                 logflags &= ~xfs_ilog_fext(whichfork);
5398         else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5399                  XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
5400                 logflags &= ~xfs_ilog_fbroot(whichfork);
5401         /*
5402          * Log inode even in the error case, if the transaction
5403          * is dirty we'll need to shut down the filesystem.
5404          */
5405         if (logflags)
5406                 xfs_trans_log_inode(tp, ip, logflags);
5407         if (cur) {
5408                 if (!error) {
5409                         *firstblock = cur->bc_private.b.firstblock;
5410                         cur->bc_private.b.allocated = 0;
5411                 }
5412                 xfs_btree_del_cursor(cur,
5413                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5414         }
5415         return error;
5416 }
5417
5418 /*
5419  * Determine whether an extent shift can be accomplished by a merge with the
5420  * extent that precedes the target hole of the shift.
5421  */
5422 STATIC bool
5423 xfs_bmse_can_merge(
5424         struct xfs_bmbt_irec    *left,  /* preceding extent */
5425         struct xfs_bmbt_irec    *got,   /* current extent to shift */
5426         xfs_fileoff_t           shift)  /* shift fsb */
5427 {
5428         xfs_fileoff_t           startoff;
5429
5430         startoff = got->br_startoff - shift;
5431
5432         /*
5433          * The extent, once shifted, must be adjacent in-file and on-disk with
5434          * the preceding extent.
5435          */
5436         if ((left->br_startoff + left->br_blockcount != startoff) ||
5437             (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5438             (left->br_state != got->br_state) ||
5439             (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5440                 return false;
5441
5442         return true;
5443 }
5444
5445 /*
5446  * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5447  * hole in the file. If an extent shift would result in the extent being fully
5448  * adjacent to the extent that currently precedes the hole, we can merge with
5449  * the preceding extent rather than do the shift.
5450  *
5451  * This function assumes the caller has verified a shift-by-merge is possible
5452  * with the provided extents via xfs_bmse_can_merge().
5453  */
5454 STATIC int
5455 xfs_bmse_merge(
5456         struct xfs_inode                *ip,
5457         int                             whichfork,
5458         xfs_fileoff_t                   shift,          /* shift fsb */
5459         int                             current_ext,    /* idx of gotp */
5460         struct xfs_bmbt_rec_host        *gotp,          /* extent to shift */
5461         struct xfs_bmbt_rec_host        *leftp,         /* preceding extent */
5462         struct xfs_btree_cur            *cur,
5463         int                             *logflags)      /* output */
5464 {
5465         struct xfs_bmbt_irec            got;
5466         struct xfs_bmbt_irec            left;
5467         xfs_filblks_t                   blockcount;
5468         int                             error, i;
5469         struct xfs_mount                *mp = ip->i_mount;
5470
5471         xfs_bmbt_get_all(gotp, &got);
5472         xfs_bmbt_get_all(leftp, &left);
5473         blockcount = left.br_blockcount + got.br_blockcount;
5474
5475         ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5476         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5477         ASSERT(xfs_bmse_can_merge(&left, &got, shift));
5478
5479         /*
5480          * Merge the in-core extents. Note that the host record pointers and
5481          * current_ext index are invalid once the extent has been removed via
5482          * xfs_iext_remove().
5483          */
5484         xfs_bmbt_set_blockcount(leftp, blockcount);
5485         xfs_iext_remove(ip, current_ext, 1, 0);
5486
5487         /*
5488          * Update the on-disk extent count, the btree if necessary and log the
5489          * inode.
5490          */
5491         XFS_IFORK_NEXT_SET(ip, whichfork,
5492                            XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5493         *logflags |= XFS_ILOG_CORE;
5494         if (!cur) {
5495                 *logflags |= XFS_ILOG_DEXT;
5496                 return 0;
5497         }
5498
5499         /* lookup and remove the extent to merge */
5500         error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
5501                                    got.br_blockcount, &i);
5502         if (error)
5503                 return error;
5504         XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5505
5506         error = xfs_btree_delete(cur, &i);
5507         if (error)
5508                 return error;
5509         XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5510
5511         /* lookup and update size of the previous extent */
5512         error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock,
5513                                    left.br_blockcount, &i);
5514         if (error)
5515                 return error;
5516         XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5517
5518         left.br_blockcount = blockcount;
5519
5520         return xfs_bmbt_update(cur, left.br_startoff, left.br_startblock,
5521                                left.br_blockcount, left.br_state);
5522 }
5523
5524 /*
5525  * Shift a single extent.
5526  */
5527 STATIC int
5528 xfs_bmse_shift_one(
5529         struct xfs_inode                *ip,
5530         int                             whichfork,
5531         xfs_fileoff_t                   offset_shift_fsb,
5532         int                             *current_ext,
5533         struct xfs_bmbt_rec_host        *gotp,
5534         struct xfs_btree_cur            *cur,
5535         int                             *logflags,
5536         enum shift_direction            direction)
5537 {
5538         struct xfs_ifork                *ifp;
5539         struct xfs_mount                *mp;
5540         xfs_fileoff_t                   startoff;
5541         struct xfs_bmbt_rec_host        *adj_irecp;
5542         struct xfs_bmbt_irec            got;
5543         struct xfs_bmbt_irec            adj_irec;
5544         int                             error;
5545         int                             i;
5546         int                             total_extents;
5547
5548         mp = ip->i_mount;
5549         ifp = XFS_IFORK_PTR(ip, whichfork);
5550         total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5551
5552         xfs_bmbt_get_all(gotp, &got);
5553
5554         /* delalloc extents should be prevented by caller */
5555         XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
5556
5557         if (direction == SHIFT_LEFT) {
5558                 startoff = got.br_startoff - offset_shift_fsb;
5559
5560                 /*
5561                  * Check for merge if we've got an extent to the left,
5562                  * otherwise make sure there's enough room at the start
5563                  * of the file for the shift.
5564                  */
5565                 if (!*current_ext) {
5566                         if (got.br_startoff < offset_shift_fsb)
5567                                 return -EINVAL;
5568                         goto update_current_ext;
5569                 }
5570                 /*
5571                  * grab the left extent and check for a large
5572                  * enough hole.
5573                  */
5574                 adj_irecp = xfs_iext_get_ext(ifp, *current_ext - 1);
5575                 xfs_bmbt_get_all(adj_irecp, &adj_irec);
5576
5577                 if (startoff <
5578                     adj_irec.br_startoff + adj_irec.br_blockcount)
5579                         return -EINVAL;
5580
5581                 /* check whether to merge the extent or shift it down */
5582                 if (xfs_bmse_can_merge(&adj_irec, &got,
5583                                        offset_shift_fsb)) {
5584                         return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
5585                                               *current_ext, gotp, adj_irecp,
5586                                               cur, logflags);
5587                 }
5588         } else {
5589                 startoff = got.br_startoff + offset_shift_fsb;
5590                 /* nothing to move if this is the last extent */
5591                 if (*current_ext >= (total_extents - 1))
5592                         goto update_current_ext;
5593                 /*
5594                  * If this is not the last extent in the file, make sure there
5595                  * is enough room between current extent and next extent for
5596                  * accommodating the shift.
5597                  */
5598                 adj_irecp = xfs_iext_get_ext(ifp, *current_ext + 1);
5599                 xfs_bmbt_get_all(adj_irecp, &adj_irec);
5600                 if (startoff + got.br_blockcount > adj_irec.br_startoff)
5601                         return -EINVAL;
5602                 /*
5603                  * Unlike a left shift (which involves a hole punch),
5604                  * a right shift does not modify extent neighbors
5605                  * in any way. We should never find mergeable extents
5606                  * in this scenario. Check anyways and warn if we
5607                  * encounter two extents that could be one.
5608                  */
5609                 if (xfs_bmse_can_merge(&got, &adj_irec, offset_shift_fsb))
5610                         WARN_ON_ONCE(1);
5611         }
5612         /*
5613          * Increment the extent index for the next iteration, update the start
5614          * offset of the in-core extent and update the btree if applicable.
5615          */
5616 update_current_ext:
5617         if (direction == SHIFT_LEFT)
5618                 (*current_ext)++;
5619         else
5620                 (*current_ext)--;
5621         xfs_bmbt_set_startoff(gotp, startoff);
5622         *logflags |= XFS_ILOG_CORE;
5623         if (!cur) {
5624                 *logflags |= XFS_ILOG_DEXT;
5625                 return 0;
5626         }
5627
5628         error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
5629                                    got.br_blockcount, &i);
5630         if (error)
5631                 return error;
5632         XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5633
5634         got.br_startoff = startoff;
5635         return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
5636                                got.br_blockcount, got.br_state);
5637 }
5638
5639 /*
5640  * Shift extent records to the left/right to cover/create a hole.
5641  *
5642  * The maximum number of extents to be shifted in a single operation is
5643  * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the
5644  * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
5645  * is the length by which each extent is shifted. If there is no hole to shift
5646  * the extents into, this will be considered invalid operation and we abort
5647  * immediately.
5648  */
5649 int
5650 xfs_bmap_shift_extents(
5651         struct xfs_trans        *tp,
5652         struct xfs_inode        *ip,
5653         xfs_fileoff_t           *next_fsb,
5654         xfs_fileoff_t           offset_shift_fsb,
5655         int                     *done,
5656         xfs_fileoff_t           stop_fsb,
5657         xfs_fsblock_t           *firstblock,
5658         struct xfs_bmap_free    *flist,
5659         enum shift_direction    direction,
5660         int                     num_exts)
5661 {
5662         struct xfs_btree_cur            *cur = NULL;
5663         struct xfs_bmbt_rec_host        *gotp;
5664         struct xfs_bmbt_irec            got;
5665         struct xfs_mount                *mp = ip->i_mount;
5666         struct xfs_ifork                *ifp;
5667         xfs_extnum_t                    nexts = 0;
5668         xfs_extnum_t                    current_ext;
5669         xfs_extnum_t                    total_extents;
5670         xfs_extnum_t                    stop_extent;
5671         int                             error = 0;
5672         int                             whichfork = XFS_DATA_FORK;
5673         int                             logflags = 0;
5674
5675         if (unlikely(XFS_TEST_ERROR(
5676             (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5677              XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5678              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5679                 XFS_ERROR_REPORT("xfs_bmap_shift_extents",
5680                                  XFS_ERRLEVEL_LOW, mp);
5681                 return -EFSCORRUPTED;
5682         }
5683
5684         if (XFS_FORCED_SHUTDOWN(mp))
5685                 return -EIO;
5686
5687         ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5688         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5689         ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
5690         ASSERT(*next_fsb != NULLFSBLOCK || direction == SHIFT_RIGHT);
5691
5692         ifp = XFS_IFORK_PTR(ip, whichfork);
5693         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5694                 /* Read in all the extents */
5695                 error = xfs_iread_extents(tp, ip, whichfork);
5696                 if (error)
5697                         return error;
5698         }
5699
5700         if (ifp->if_flags & XFS_IFBROOT) {
5701                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5702                 cur->bc_private.b.firstblock = *firstblock;
5703                 cur->bc_private.b.flist = flist;
5704                 cur->bc_private.b.flags = 0;
5705         }
5706
5707         /*
5708          * There may be delalloc extents in the data fork before the range we
5709          * are collapsing out, so we cannot use the count of real extents here.
5710          * Instead we have to calculate it from the incore fork.
5711          */
5712         total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5713         if (total_extents == 0) {
5714                 *done = 1;
5715                 goto del_cursor;
5716         }
5717
5718         /*
5719          * In case of first right shift, we need to initialize next_fsb
5720          */
5721         if (*next_fsb == NULLFSBLOCK) {
5722                 gotp = xfs_iext_get_ext(ifp, total_extents - 1);
5723                 xfs_bmbt_get_all(gotp, &got);
5724                 *next_fsb = got.br_startoff;
5725                 if (stop_fsb > *next_fsb) {
5726                         *done = 1;
5727                         goto del_cursor;
5728                 }
5729         }
5730
5731         /* Lookup the extent index at which we have to stop */
5732         if (direction == SHIFT_RIGHT) {
5733                 gotp = xfs_iext_bno_to_ext(ifp, stop_fsb, &stop_extent);
5734                 /* Make stop_extent exclusive of shift range */
5735                 stop_extent--;
5736         } else
5737                 stop_extent = total_extents;
5738
5739         /*
5740          * Look up the extent index for the fsb where we start shifting. We can
5741          * henceforth iterate with current_ext as extent list changes are locked
5742          * out via ilock.
5743          *
5744          * gotp can be null in 2 cases: 1) if there are no extents or 2)
5745          * *next_fsb lies in a hole beyond which there are no extents. Either
5746          * way, we are done.
5747          */
5748         gotp = xfs_iext_bno_to_ext(ifp, *next_fsb, &current_ext);
5749         if (!gotp) {
5750                 *done = 1;
5751                 goto del_cursor;
5752         }
5753
5754         /* some sanity checking before we finally start shifting extents */
5755         if ((direction == SHIFT_LEFT && current_ext >= stop_extent) ||
5756              (direction == SHIFT_RIGHT && current_ext <= stop_extent)) {
5757                 error = -EIO;
5758                 goto del_cursor;
5759         }
5760
5761         while (nexts++ < num_exts) {
5762                 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
5763                                            &current_ext, gotp, cur, &logflags,
5764                                            direction);
5765                 if (error)
5766                         goto del_cursor;
5767                 /*
5768                  * If there was an extent merge during the shift, the extent
5769                  * count can change. Update the total and grade the next record.
5770                  */
5771                 if (direction == SHIFT_LEFT) {
5772                         total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5773                         stop_extent = total_extents;
5774                 }
5775
5776                 if (current_ext == stop_extent) {
5777                         *done = 1;
5778                         *next_fsb = NULLFSBLOCK;
5779                         break;
5780                 }
5781                 gotp = xfs_iext_get_ext(ifp, current_ext);
5782         }
5783
5784         if (!*done) {
5785                 xfs_bmbt_get_all(gotp, &got);
5786                 *next_fsb = got.br_startoff;
5787         }
5788
5789 del_cursor:
5790         if (cur)
5791                 xfs_btree_del_cursor(cur,
5792                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5793
5794         if (logflags)
5795                 xfs_trans_log_inode(tp, ip, logflags);
5796
5797         return error;
5798 }
5799
5800 /*
5801  * Splits an extent into two extents at split_fsb block such that it is
5802  * the first block of the current_ext. @current_ext is a target extent
5803  * to be split. @split_fsb is a block where the extents is split.
5804  * If split_fsb lies in a hole or the first block of extents, just return 0.
5805  */
5806 STATIC int
5807 xfs_bmap_split_extent_at(
5808         struct xfs_trans        *tp,
5809         struct xfs_inode        *ip,
5810         xfs_fileoff_t           split_fsb,
5811         xfs_fsblock_t           *firstfsb,
5812         struct xfs_bmap_free    *free_list)
5813 {
5814         int                             whichfork = XFS_DATA_FORK;
5815         struct xfs_btree_cur            *cur = NULL;
5816         struct xfs_bmbt_rec_host        *gotp;
5817         struct xfs_bmbt_irec            got;
5818         struct xfs_bmbt_irec            new; /* split extent */
5819         struct xfs_mount                *mp = ip->i_mount;
5820         struct xfs_ifork                *ifp;
5821         xfs_fsblock_t                   gotblkcnt; /* new block count for got */
5822         xfs_extnum_t                    current_ext;
5823         int                             error = 0;
5824         int                             logflags = 0;
5825         int                             i = 0;
5826
5827         if (unlikely(XFS_TEST_ERROR(
5828             (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5829              XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5830              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5831                 XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
5832                                  XFS_ERRLEVEL_LOW, mp);
5833                 return -EFSCORRUPTED;
5834         }
5835
5836         if (XFS_FORCED_SHUTDOWN(mp))
5837                 return -EIO;
5838
5839         ifp = XFS_IFORK_PTR(ip, whichfork);
5840         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5841                 /* Read in all the extents */
5842                 error = xfs_iread_extents(tp, ip, whichfork);
5843                 if (error)
5844                         return error;
5845         }
5846
5847         /*
5848          * gotp can be null in 2 cases: 1) if there are no extents
5849          * or 2) split_fsb lies in a hole beyond which there are
5850          * no extents. Either way, we are done.
5851          */
5852         gotp = xfs_iext_bno_to_ext(ifp, split_fsb, &current_ext);
5853         if (!gotp)
5854                 return 0;
5855
5856         xfs_bmbt_get_all(gotp, &got);
5857
5858         /*
5859          * Check split_fsb lies in a hole or the start boundary offset
5860          * of the extent.
5861          */
5862         if (got.br_startoff >= split_fsb)
5863                 return 0;
5864
5865         gotblkcnt = split_fsb - got.br_startoff;
5866         new.br_startoff = split_fsb;
5867         new.br_startblock = got.br_startblock + gotblkcnt;
5868         new.br_blockcount = got.br_blockcount - gotblkcnt;
5869         new.br_state = got.br_state;
5870
5871         if (ifp->if_flags & XFS_IFBROOT) {
5872                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5873                 cur->bc_private.b.firstblock = *firstfsb;
5874                 cur->bc_private.b.flist = free_list;
5875                 cur->bc_private.b.flags = 0;
5876                 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5877                                 got.br_startblock,
5878                                 got.br_blockcount,
5879                                 &i);
5880                 if (error)
5881                         goto del_cursor;
5882                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5883         }
5884
5885         xfs_bmbt_set_blockcount(gotp, gotblkcnt);
5886         got.br_blockcount = gotblkcnt;
5887
5888         logflags = XFS_ILOG_CORE;
5889         if (cur) {
5890                 error = xfs_bmbt_update(cur, got.br_startoff,
5891                                 got.br_startblock,
5892                                 got.br_blockcount,
5893                                 got.br_state);
5894                 if (error)
5895                         goto del_cursor;
5896         } else
5897                 logflags |= XFS_ILOG_DEXT;
5898
5899         /* Add new extent */
5900         current_ext++;
5901         xfs_iext_insert(ip, current_ext, 1, &new, 0);
5902         XFS_IFORK_NEXT_SET(ip, whichfork,
5903                            XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5904
5905         if (cur) {
5906                 error = xfs_bmbt_lookup_eq(cur, new.br_startoff,
5907                                 new.br_startblock, new.br_blockcount,
5908                                 &i);
5909                 if (error)
5910                         goto del_cursor;
5911                 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
5912                 cur->bc_rec.b.br_state = new.br_state;
5913
5914                 error = xfs_btree_insert(cur, &i);
5915                 if (error)
5916                         goto del_cursor;
5917                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5918         }
5919
5920         /*
5921          * Convert to a btree if necessary.
5922          */
5923         if (xfs_bmap_needs_btree(ip, whichfork)) {
5924                 int tmp_logflags; /* partial log flag return val */
5925
5926                 ASSERT(cur == NULL);
5927                 error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list,
5928                                 &cur, 0, &tmp_logflags, whichfork);
5929                 logflags |= tmp_logflags;
5930         }
5931
5932 del_cursor:
5933         if (cur) {
5934                 cur->bc_private.b.allocated = 0;
5935                 xfs_btree_del_cursor(cur,
5936                                 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5937         }
5938
5939         if (logflags)
5940                 xfs_trans_log_inode(tp, ip, logflags);
5941         return error;
5942 }
5943
5944 int
5945 xfs_bmap_split_extent(
5946         struct xfs_inode        *ip,
5947         xfs_fileoff_t           split_fsb)
5948 {
5949         struct xfs_mount        *mp = ip->i_mount;
5950         struct xfs_trans        *tp;
5951         struct xfs_bmap_free    free_list;
5952         xfs_fsblock_t           firstfsb;
5953         int                     committed;
5954         int                     error;
5955
5956         tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
5957         error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
5958                         XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
5959         if (error) {
5960                 xfs_trans_cancel(tp);
5961                 return error;
5962         }
5963
5964         xfs_ilock(ip, XFS_ILOCK_EXCL);
5965         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
5966
5967         xfs_bmap_init(&free_list, &firstfsb);
5968
5969         error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
5970                         &firstfsb, &free_list);
5971         if (error)
5972                 goto out;
5973
5974         error = xfs_bmap_finish(&tp, &free_list, &committed);
5975         if (error)
5976                 goto out;
5977
5978         return xfs_trans_commit(tp);
5979
5980 out:
5981         xfs_bmap_cancel(&free_list);
5982         xfs_trans_cancel(tp);
5983         return error;
5984 }