]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/xfs/xfs_extent_busy.c
Merge tag 'clk-fixes-for-linus' of git://git.linaro.org/people/mturquette/linux
[karo-tx-linux.git] / fs / xfs / xfs_extent_busy.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * Copyright (c) 2010 David Chinner.
4  * Copyright (c) 2011 Christoph Hellwig.
5  * All Rights Reserved.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it would be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write the Free Software Foundation,
18  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  */
20 #include "xfs.h"
21 #include "xfs_fs.h"
22 #include "xfs_types.h"
23 #include "xfs_log.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_mount.h"
28 #include "xfs_bmap_btree.h"
29 #include "xfs_alloc.h"
30 #include "xfs_inode.h"
31 #include "xfs_extent_busy.h"
32 #include "xfs_trace.h"
33
34 void
35 xfs_extent_busy_insert(
36         struct xfs_trans        *tp,
37         xfs_agnumber_t          agno,
38         xfs_agblock_t           bno,
39         xfs_extlen_t            len,
40         unsigned int            flags)
41 {
42         struct xfs_extent_busy  *new;
43         struct xfs_extent_busy  *busyp;
44         struct xfs_perag        *pag;
45         struct rb_node          **rbp;
46         struct rb_node          *parent = NULL;
47
48         new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL);
49         if (!new) {
50                 /*
51                  * No Memory!  Since it is now not possible to track the free
52                  * block, make this a synchronous transaction to insure that
53                  * the block is not reused before this transaction commits.
54                  */
55                 trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len);
56                 xfs_trans_set_sync(tp);
57                 return;
58         }
59
60         new->agno = agno;
61         new->bno = bno;
62         new->length = len;
63         INIT_LIST_HEAD(&new->list);
64         new->flags = flags;
65
66         /* trace before insert to be able to see failed inserts */
67         trace_xfs_extent_busy(tp->t_mountp, agno, bno, len);
68
69         pag = xfs_perag_get(tp->t_mountp, new->agno);
70         spin_lock(&pag->pagb_lock);
71         rbp = &pag->pagb_tree.rb_node;
72         while (*rbp) {
73                 parent = *rbp;
74                 busyp = rb_entry(parent, struct xfs_extent_busy, rb_node);
75
76                 if (new->bno < busyp->bno) {
77                         rbp = &(*rbp)->rb_left;
78                         ASSERT(new->bno + new->length <= busyp->bno);
79                 } else if (new->bno > busyp->bno) {
80                         rbp = &(*rbp)->rb_right;
81                         ASSERT(bno >= busyp->bno + busyp->length);
82                 } else {
83                         ASSERT(0);
84                 }
85         }
86
87         rb_link_node(&new->rb_node, parent, rbp);
88         rb_insert_color(&new->rb_node, &pag->pagb_tree);
89
90         list_add(&new->list, &tp->t_busy);
91         spin_unlock(&pag->pagb_lock);
92         xfs_perag_put(pag);
93 }
94
95 /*
96  * Search for a busy extent within the range of the extent we are about to
97  * allocate.  You need to be holding the busy extent tree lock when calling
98  * xfs_extent_busy_search(). This function returns 0 for no overlapping busy
99  * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
100  * match. This is done so that a non-zero return indicates an overlap that
101  * will require a synchronous transaction, but it can still be
102  * used to distinguish between a partial or exact match.
103  */
104 int
105 xfs_extent_busy_search(
106         struct xfs_mount        *mp,
107         xfs_agnumber_t          agno,
108         xfs_agblock_t           bno,
109         xfs_extlen_t            len)
110 {
111         struct xfs_perag        *pag;
112         struct rb_node          *rbp;
113         struct xfs_extent_busy  *busyp;
114         int                     match = 0;
115
116         pag = xfs_perag_get(mp, agno);
117         spin_lock(&pag->pagb_lock);
118
119         rbp = pag->pagb_tree.rb_node;
120
121         /* find closest start bno overlap */
122         while (rbp) {
123                 busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node);
124                 if (bno < busyp->bno) {
125                         /* may overlap, but exact start block is lower */
126                         if (bno + len > busyp->bno)
127                                 match = -1;
128                         rbp = rbp->rb_left;
129                 } else if (bno > busyp->bno) {
130                         /* may overlap, but exact start block is higher */
131                         if (bno < busyp->bno + busyp->length)
132                                 match = -1;
133                         rbp = rbp->rb_right;
134                 } else {
135                         /* bno matches busyp, length determines exact match */
136                         match = (busyp->length == len) ? 1 : -1;
137                         break;
138                 }
139         }
140         spin_unlock(&pag->pagb_lock);
141         xfs_perag_put(pag);
142         return match;
143 }
144
145 /*
146  * The found free extent [fbno, fend] overlaps part or all of the given busy
147  * extent.  If the overlap covers the beginning, the end, or all of the busy
148  * extent, the overlapping portion can be made unbusy and used for the
149  * allocation.  We can't split a busy extent because we can't modify a
150  * transaction/CIL context busy list, but we can update an entries block
151  * number or length.
152  *
153  * Returns true if the extent can safely be reused, or false if the search
154  * needs to be restarted.
155  */
156 STATIC bool
157 xfs_extent_busy_update_extent(
158         struct xfs_mount        *mp,
159         struct xfs_perag        *pag,
160         struct xfs_extent_busy  *busyp,
161         xfs_agblock_t           fbno,
162         xfs_extlen_t            flen,
163         bool                    userdata)
164 {
165         xfs_agblock_t           fend = fbno + flen;
166         xfs_agblock_t           bbno = busyp->bno;
167         xfs_agblock_t           bend = bbno + busyp->length;
168
169         /*
170          * This extent is currently being discarded.  Give the thread
171          * performing the discard a chance to mark the extent unbusy
172          * and retry.
173          */
174         if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) {
175                 spin_unlock(&pag->pagb_lock);
176                 delay(1);
177                 spin_lock(&pag->pagb_lock);
178                 return false;
179         }
180
181         /*
182          * If there is a busy extent overlapping a user allocation, we have
183          * no choice but to force the log and retry the search.
184          *
185          * Fortunately this does not happen during normal operation, but
186          * only if the filesystem is very low on space and has to dip into
187          * the AGFL for normal allocations.
188          */
189         if (userdata)
190                 goto out_force_log;
191
192         if (bbno < fbno && bend > fend) {
193                 /*
194                  * Case 1:
195                  *    bbno           bend
196                  *    +BBBBBBBBBBBBBBBBB+
197                  *        +---------+
198                  *        fbno   fend
199                  */
200
201                 /*
202                  * We would have to split the busy extent to be able to track
203                  * it correct, which we cannot do because we would have to
204                  * modify the list of busy extents attached to the transaction
205                  * or CIL context, which is immutable.
206                  *
207                  * Force out the log to clear the busy extent and retry the
208                  * search.
209                  */
210                 goto out_force_log;
211         } else if (bbno >= fbno && bend <= fend) {
212                 /*
213                  * Case 2:
214                  *    bbno           bend
215                  *    +BBBBBBBBBBBBBBBBB+
216                  *    +-----------------+
217                  *    fbno           fend
218                  *
219                  * Case 3:
220                  *    bbno           bend
221                  *    +BBBBBBBBBBBBBBBBB+
222                  *    +--------------------------+
223                  *    fbno                    fend
224                  *
225                  * Case 4:
226                  *             bbno           bend
227                  *             +BBBBBBBBBBBBBBBBB+
228                  *    +--------------------------+
229                  *    fbno                    fend
230                  *
231                  * Case 5:
232                  *             bbno           bend
233                  *             +BBBBBBBBBBBBBBBBB+
234                  *    +-----------------------------------+
235                  *    fbno                             fend
236                  *
237                  */
238
239                 /*
240                  * The busy extent is fully covered by the extent we are
241                  * allocating, and can simply be removed from the rbtree.
242                  * However we cannot remove it from the immutable list
243                  * tracking busy extents in the transaction or CIL context,
244                  * so set the length to zero to mark it invalid.
245                  *
246                  * We also need to restart the busy extent search from the
247                  * tree root, because erasing the node can rearrange the
248                  * tree topology.
249                  */
250                 rb_erase(&busyp->rb_node, &pag->pagb_tree);
251                 busyp->length = 0;
252                 return false;
253         } else if (fend < bend) {
254                 /*
255                  * Case 6:
256                  *              bbno           bend
257                  *             +BBBBBBBBBBBBBBBBB+
258                  *             +---------+
259                  *             fbno   fend
260                  *
261                  * Case 7:
262                  *             bbno           bend
263                  *             +BBBBBBBBBBBBBBBBB+
264                  *    +------------------+
265                  *    fbno            fend
266                  *
267                  */
268                 busyp->bno = fend;
269         } else if (bbno < fbno) {
270                 /*
271                  * Case 8:
272                  *    bbno           bend
273                  *    +BBBBBBBBBBBBBBBBB+
274                  *        +-------------+
275                  *        fbno       fend
276                  *
277                  * Case 9:
278                  *    bbno           bend
279                  *    +BBBBBBBBBBBBBBBBB+
280                  *        +----------------------+
281                  *        fbno                fend
282                  */
283                 busyp->length = fbno - busyp->bno;
284         } else {
285                 ASSERT(0);
286         }
287
288         trace_xfs_extent_busy_reuse(mp, pag->pag_agno, fbno, flen);
289         return true;
290
291 out_force_log:
292         spin_unlock(&pag->pagb_lock);
293         xfs_log_force(mp, XFS_LOG_SYNC);
294         trace_xfs_extent_busy_force(mp, pag->pag_agno, fbno, flen);
295         spin_lock(&pag->pagb_lock);
296         return false;
297 }
298
299
300 /*
301  * For a given extent [fbno, flen], make sure we can reuse it safely.
302  */
303 void
304 xfs_extent_busy_reuse(
305         struct xfs_mount        *mp,
306         xfs_agnumber_t          agno,
307         xfs_agblock_t           fbno,
308         xfs_extlen_t            flen,
309         bool                    userdata)
310 {
311         struct xfs_perag        *pag;
312         struct rb_node          *rbp;
313
314         ASSERT(flen > 0);
315
316         pag = xfs_perag_get(mp, agno);
317         spin_lock(&pag->pagb_lock);
318 restart:
319         rbp = pag->pagb_tree.rb_node;
320         while (rbp) {
321                 struct xfs_extent_busy *busyp =
322                         rb_entry(rbp, struct xfs_extent_busy, rb_node);
323                 xfs_agblock_t   bbno = busyp->bno;
324                 xfs_agblock_t   bend = bbno + busyp->length;
325
326                 if (fbno + flen <= bbno) {
327                         rbp = rbp->rb_left;
328                         continue;
329                 } else if (fbno >= bend) {
330                         rbp = rbp->rb_right;
331                         continue;
332                 }
333
334                 if (!xfs_extent_busy_update_extent(mp, pag, busyp, fbno, flen,
335                                                   userdata))
336                         goto restart;
337         }
338         spin_unlock(&pag->pagb_lock);
339         xfs_perag_put(pag);
340 }
341
342 /*
343  * For a given extent [fbno, flen], search the busy extent list to find a
344  * subset of the extent that is not busy.  If *rlen is smaller than
345  * args->minlen no suitable extent could be found, and the higher level
346  * code needs to force out the log and retry the allocation.
347  */
348 void
349 xfs_extent_busy_trim(
350         struct xfs_alloc_arg    *args,
351         xfs_agblock_t           bno,
352         xfs_extlen_t            len,
353         xfs_agblock_t           *rbno,
354         xfs_extlen_t            *rlen)
355 {
356         xfs_agblock_t           fbno;
357         xfs_extlen_t            flen;
358         struct rb_node          *rbp;
359
360         ASSERT(len > 0);
361
362         spin_lock(&args->pag->pagb_lock);
363 restart:
364         fbno = bno;
365         flen = len;
366         rbp = args->pag->pagb_tree.rb_node;
367         while (rbp && flen >= args->minlen) {
368                 struct xfs_extent_busy *busyp =
369                         rb_entry(rbp, struct xfs_extent_busy, rb_node);
370                 xfs_agblock_t   fend = fbno + flen;
371                 xfs_agblock_t   bbno = busyp->bno;
372                 xfs_agblock_t   bend = bbno + busyp->length;
373
374                 if (fend <= bbno) {
375                         rbp = rbp->rb_left;
376                         continue;
377                 } else if (fbno >= bend) {
378                         rbp = rbp->rb_right;
379                         continue;
380                 }
381
382                 /*
383                  * If this is a metadata allocation, try to reuse the busy
384                  * extent instead of trimming the allocation.
385                  */
386                 if (!args->userdata &&
387                     !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
388                         if (!xfs_extent_busy_update_extent(args->mp, args->pag,
389                                                           busyp, fbno, flen,
390                                                           false))
391                                 goto restart;
392                         continue;
393                 }
394
395                 if (bbno <= fbno) {
396                         /* start overlap */
397
398                         /*
399                          * Case 1:
400                          *    bbno           bend
401                          *    +BBBBBBBBBBBBBBBBB+
402                          *        +---------+
403                          *        fbno   fend
404                          *
405                          * Case 2:
406                          *    bbno           bend
407                          *    +BBBBBBBBBBBBBBBBB+
408                          *    +-------------+
409                          *    fbno       fend
410                          *
411                          * Case 3:
412                          *    bbno           bend
413                          *    +BBBBBBBBBBBBBBBBB+
414                          *        +-------------+
415                          *        fbno       fend
416                          *
417                          * Case 4:
418                          *    bbno           bend
419                          *    +BBBBBBBBBBBBBBBBB+
420                          *    +-----------------+
421                          *    fbno           fend
422                          *
423                          * No unbusy region in extent, return failure.
424                          */
425                         if (fend <= bend)
426                                 goto fail;
427
428                         /*
429                          * Case 5:
430                          *    bbno           bend
431                          *    +BBBBBBBBBBBBBBBBB+
432                          *        +----------------------+
433                          *        fbno                fend
434                          *
435                          * Case 6:
436                          *    bbno           bend
437                          *    +BBBBBBBBBBBBBBBBB+
438                          *    +--------------------------+
439                          *    fbno                    fend
440                          *
441                          * Needs to be trimmed to:
442                          *                       +-------+
443                          *                       fbno fend
444                          */
445                         fbno = bend;
446                 } else if (bend >= fend) {
447                         /* end overlap */
448
449                         /*
450                          * Case 7:
451                          *             bbno           bend
452                          *             +BBBBBBBBBBBBBBBBB+
453                          *    +------------------+
454                          *    fbno            fend
455                          *
456                          * Case 8:
457                          *             bbno           bend
458                          *             +BBBBBBBBBBBBBBBBB+
459                          *    +--------------------------+
460                          *    fbno                    fend
461                          *
462                          * Needs to be trimmed to:
463                          *    +-------+
464                          *    fbno fend
465                          */
466                         fend = bbno;
467                 } else {
468                         /* middle overlap */
469
470                         /*
471                          * Case 9:
472                          *             bbno           bend
473                          *             +BBBBBBBBBBBBBBBBB+
474                          *    +-----------------------------------+
475                          *    fbno                             fend
476                          *
477                          * Can be trimmed to:
478                          *    +-------+        OR         +-------+
479                          *    fbno fend                   fbno fend
480                          *
481                          * Backward allocation leads to significant
482                          * fragmentation of directories, which degrades
483                          * directory performance, therefore we always want to
484                          * choose the option that produces forward allocation
485                          * patterns.
486                          * Preferring the lower bno extent will make the next
487                          * request use "fend" as the start of the next
488                          * allocation;  if the segment is no longer busy at
489                          * that point, we'll get a contiguous allocation, but
490                          * even if it is still busy, we will get a forward
491                          * allocation.
492                          * We try to avoid choosing the segment at "bend",
493                          * because that can lead to the next allocation
494                          * taking the segment at "fbno", which would be a
495                          * backward allocation.  We only use the segment at
496                          * "fbno" if it is much larger than the current
497                          * requested size, because in that case there's a
498                          * good chance subsequent allocations will be
499                          * contiguous.
500                          */
501                         if (bbno - fbno >= args->maxlen) {
502                                 /* left candidate fits perfect */
503                                 fend = bbno;
504                         } else if (fend - bend >= args->maxlen * 4) {
505                                 /* right candidate has enough free space */
506                                 fbno = bend;
507                         } else if (bbno - fbno >= args->minlen) {
508                                 /* left candidate fits minimum requirement */
509                                 fend = bbno;
510                         } else {
511                                 goto fail;
512                         }
513                 }
514
515                 flen = fend - fbno;
516         }
517         spin_unlock(&args->pag->pagb_lock);
518
519         if (fbno != bno || flen != len) {
520                 trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len,
521                                           fbno, flen);
522         }
523         *rbno = fbno;
524         *rlen = flen;
525         return;
526 fail:
527         /*
528          * Return a zero extent length as failure indications.  All callers
529          * re-check if the trimmed extent satisfies the minlen requirement.
530          */
531         spin_unlock(&args->pag->pagb_lock);
532         trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
533         *rbno = fbno;
534         *rlen = 0;
535 }
536
537 STATIC void
538 xfs_extent_busy_clear_one(
539         struct xfs_mount        *mp,
540         struct xfs_perag        *pag,
541         struct xfs_extent_busy  *busyp)
542 {
543         if (busyp->length) {
544                 trace_xfs_extent_busy_clear(mp, busyp->agno, busyp->bno,
545                                                 busyp->length);
546                 rb_erase(&busyp->rb_node, &pag->pagb_tree);
547         }
548
549         list_del_init(&busyp->list);
550         kmem_free(busyp);
551 }
552
553 /*
554  * Remove all extents on the passed in list from the busy extents tree.
555  * If do_discard is set skip extents that need to be discarded, and mark
556  * these as undergoing a discard operation instead.
557  */
558 void
559 xfs_extent_busy_clear(
560         struct xfs_mount        *mp,
561         struct list_head        *list,
562         bool                    do_discard)
563 {
564         struct xfs_extent_busy  *busyp, *n;
565         struct xfs_perag        *pag = NULL;
566         xfs_agnumber_t          agno = NULLAGNUMBER;
567
568         list_for_each_entry_safe(busyp, n, list, list) {
569                 if (busyp->agno != agno) {
570                         if (pag) {
571                                 spin_unlock(&pag->pagb_lock);
572                                 xfs_perag_put(pag);
573                         }
574                         pag = xfs_perag_get(mp, busyp->agno);
575                         spin_lock(&pag->pagb_lock);
576                         agno = busyp->agno;
577                 }
578
579                 if (do_discard && busyp->length &&
580                     !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD))
581                         busyp->flags = XFS_EXTENT_BUSY_DISCARDED;
582                 else
583                         xfs_extent_busy_clear_one(mp, pag, busyp);
584         }
585
586         if (pag) {
587                 spin_unlock(&pag->pagb_lock);
588                 xfs_perag_put(pag);
589         }
590 }
591
592 /*
593  * Callback for list_sort to sort busy extents by the AG they reside in.
594  */
595 int
596 xfs_extent_busy_ag_cmp(
597         void                    *priv,
598         struct list_head        *a,
599         struct list_head        *b)
600 {
601         return container_of(a, struct xfs_extent_busy, list)->agno -
602                 container_of(b, struct xfs_extent_busy, list)->agno;
603 }