~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/xfs/xfs_extent_busy.c

Version: ~ [ linux-5.8-rc3 ] ~ [ linux-5.7.5 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.48 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.129 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.185 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.228 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.228 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.85 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
  3  * Copyright (c) 2010 David Chinner.
  4  * Copyright (c) 2011 Christoph Hellwig.
  5  * All Rights Reserved.
  6  *
  7  * This program is free software; you can redistribute it and/or
  8  * modify it under the terms of the GNU General Public License as
  9  * published by the Free Software Foundation.
 10  *
 11  * This program is distributed in the hope that it would be useful,
 12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14  * GNU General Public License for more details.
 15  *
 16  * You should have received a copy of the GNU General Public License
 17  * along with this program; if not, write the Free Software Foundation,
 18  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 19  */
 20 #include "xfs.h"
 21 #include "xfs_fs.h"
 22 #include "xfs_format.h"
 23 #include "xfs_log_format.h"
 24 #include "xfs_shared.h"
 25 #include "xfs_trans_resv.h"
 26 #include "xfs_sb.h"
 27 #include "xfs_mount.h"
 28 #include "xfs_alloc.h"
 29 #include "xfs_extent_busy.h"
 30 #include "xfs_trace.h"
 31 #include "xfs_trans.h"
 32 #include "xfs_log.h"
 33 
 34 void
 35 xfs_extent_busy_insert(
 36         struct xfs_trans        *tp,
 37         xfs_agnumber_t          agno,
 38         xfs_agblock_t           bno,
 39         xfs_extlen_t            len,
 40         unsigned int            flags)
 41 {
 42         struct xfs_extent_busy  *new;
 43         struct xfs_extent_busy  *busyp;
 44         struct xfs_perag        *pag;
 45         struct rb_node          **rbp;
 46         struct rb_node          *parent = NULL;
 47 
 48         new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL);
 49         if (!new) {
 50                 /*
 51                  * No Memory!  Since it is now not possible to track the free
 52                  * block, make this a synchronous transaction to insure that
 53                  * the block is not reused before this transaction commits.
 54                  */
 55                 trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len);
 56                 xfs_trans_set_sync(tp);
 57                 return;
 58         }
 59 
 60         new->agno = agno;
 61         new->bno = bno;
 62         new->length = len;
 63         INIT_LIST_HEAD(&new->list);
 64         new->flags = flags;
 65 
 66         /* trace before insert to be able to see failed inserts */
 67         trace_xfs_extent_busy(tp->t_mountp, agno, bno, len);
 68 
 69         pag = xfs_perag_get(tp->t_mountp, new->agno);
 70         spin_lock(&pag->pagb_lock);
 71         rbp = &pag->pagb_tree.rb_node;
 72         while (*rbp) {
 73                 parent = *rbp;
 74                 busyp = rb_entry(parent, struct xfs_extent_busy, rb_node);
 75 
 76                 if (new->bno < busyp->bno) {
 77                         rbp = &(*rbp)->rb_left;
 78                         ASSERT(new->bno + new->length <= busyp->bno);
 79                 } else if (new->bno > busyp->bno) {
 80                         rbp = &(*rbp)->rb_right;
 81                         ASSERT(bno >= busyp->bno + busyp->length);
 82                 } else {
 83                         ASSERT(0);
 84                 }
 85         }
 86 
 87         rb_link_node(&new->rb_node, parent, rbp);
 88         rb_insert_color(&new->rb_node, &pag->pagb_tree);
 89 
 90         list_add(&new->list, &tp->t_busy);
 91         spin_unlock(&pag->pagb_lock);
 92         xfs_perag_put(pag);
 93 }
 94 
 95 /*
 96  * Search for a busy extent within the range of the extent we are about to
 97  * allocate.  You need to be holding the busy extent tree lock when calling
 98  * xfs_extent_busy_search(). This function returns 0 for no overlapping busy
 99  * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
100  * match. This is done so that a non-zero return indicates an overlap that
101  * will require a synchronous transaction, but it can still be
102  * used to distinguish between a partial or exact match.
103  */
104 int
105 xfs_extent_busy_search(
106         struct xfs_mount        *mp,
107         xfs_agnumber_t          agno,
108         xfs_agblock_t           bno,
109         xfs_extlen_t            len)
110 {
111         struct xfs_perag        *pag;
112         struct rb_node          *rbp;
113         struct xfs_extent_busy  *busyp;
114         int                     match = 0;
115 
116         pag = xfs_perag_get(mp, agno);
117         spin_lock(&pag->pagb_lock);
118 
119         rbp = pag->pagb_tree.rb_node;
120 
121         /* find closest start bno overlap */
122         while (rbp) {
123                 busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node);
124                 if (bno < busyp->bno) {
125                         /* may overlap, but exact start block is lower */
126                         if (bno + len > busyp->bno)
127                                 match = -1;
128                         rbp = rbp->rb_left;
129                 } else if (bno > busyp->bno) {
130                         /* may overlap, but exact start block is higher */
131                         if (bno < busyp->bno + busyp->length)
132                                 match = -1;
133                         rbp = rbp->rb_right;
134                 } else {
135                         /* bno matches busyp, length determines exact match */
136                         match = (busyp->length == len) ? 1 : -1;
137                         break;
138                 }
139         }
140         spin_unlock(&pag->pagb_lock);
141         xfs_perag_put(pag);
142         return match;
143 }
144 
145 /*
146  * The found free extent [fbno, fend] overlaps part or all of the given busy
147  * extent.  If the overlap covers the beginning, the end, or all of the busy
148  * extent, the overlapping portion can be made unbusy and used for the
149  * allocation.  We can't split a busy extent because we can't modify a
150  * transaction/CIL context busy list, but we can update an entry's block
151  * number or length.
152  *
153  * Returns true if the extent can safely be reused, or false if the search
154  * needs to be restarted.
155  */
156 STATIC bool
157 xfs_extent_busy_update_extent(
158         struct xfs_mount        *mp,
159         struct xfs_perag        *pag,
160         struct xfs_extent_busy  *busyp,
161         xfs_agblock_t           fbno,
162         xfs_extlen_t            flen,
163         bool                    userdata) __releases(&pag->pagb_lock)
164                                           __acquires(&pag->pagb_lock)
165 {
166         xfs_agblock_t           fend = fbno + flen;
167         xfs_agblock_t           bbno = busyp->bno;
168         xfs_agblock_t           bend = bbno + busyp->length;
169 
170         /*
171          * This extent is currently being discarded.  Give the thread
172          * performing the discard a chance to mark the extent unbusy
173          * and retry.
174          */
175         if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) {
176                 spin_unlock(&pag->pagb_lock);
177                 delay(1);
178                 spin_lock(&pag->pagb_lock);
179                 return false;
180         }
181 
182         /*
183          * If there is a busy extent overlapping a user allocation, we have
184          * no choice but to force the log and retry the search.
185          *
186          * Fortunately this does not happen during normal operation, but
187          * only if the filesystem is very low on space and has to dip into
188          * the AGFL for normal allocations.
189          */
190         if (userdata)
191                 goto out_force_log;
192 
193         if (bbno < fbno && bend > fend) {
194                 /*
195                  * Case 1:
196                  *    bbno           bend
197                  *    +BBBBBBBBBBBBBBBBB+
198                  *        +---------+
199                  *        fbno   fend
200                  */
201 
202                 /*
203                  * We would have to split the busy extent to be able to track
204                  * it correct, which we cannot do because we would have to
205                  * modify the list of busy extents attached to the transaction
206                  * or CIL context, which is immutable.
207                  *
208                  * Force out the log to clear the busy extent and retry the
209                  * search.
210                  */
211                 goto out_force_log;
212         } else if (bbno >= fbno && bend <= fend) {
213                 /*
214                  * Case 2:
215                  *    bbno           bend
216                  *    +BBBBBBBBBBBBBBBBB+
217                  *    +-----------------+
218                  *    fbno           fend
219                  *
220                  * Case 3:
221                  *    bbno           bend
222                  *    +BBBBBBBBBBBBBBBBB+
223                  *    +--------------------------+
224                  *    fbno                    fend
225                  *
226                  * Case 4:
227                  *             bbno           bend
228                  *             +BBBBBBBBBBBBBBBBB+
229                  *    +--------------------------+
230                  *    fbno                    fend
231                  *
232                  * Case 5:
233                  *             bbno           bend
234                  *             +BBBBBBBBBBBBBBBBB+
235                  *    +-----------------------------------+
236                  *    fbno                             fend
237                  *
238                  */
239 
240                 /*
241                  * The busy extent is fully covered by the extent we are
242                  * allocating, and can simply be removed from the rbtree.
243                  * However we cannot remove it from the immutable list
244                  * tracking busy extents in the transaction or CIL context,
245                  * so set the length to zero to mark it invalid.
246                  *
247                  * We also need to restart the busy extent search from the
248                  * tree root, because erasing the node can rearrange the
249                  * tree topology.
250                  */
251                 rb_erase(&busyp->rb_node, &pag->pagb_tree);
252                 busyp->length = 0;
253                 return false;
254         } else if (fend < bend) {
255                 /*
256                  * Case 6:
257                  *              bbno           bend
258                  *             +BBBBBBBBBBBBBBBBB+
259                  *             +---------+
260                  *             fbno   fend
261                  *
262                  * Case 7:
263                  *             bbno           bend
264                  *             +BBBBBBBBBBBBBBBBB+
265                  *    +------------------+
266                  *    fbno            fend
267                  *
268                  */
269                 busyp->bno = fend;
270         } else if (bbno < fbno) {
271                 /*
272                  * Case 8:
273                  *    bbno           bend
274                  *    +BBBBBBBBBBBBBBBBB+
275                  *        +-------------+
276                  *        fbno       fend
277                  *
278                  * Case 9:
279                  *    bbno           bend
280                  *    +BBBBBBBBBBBBBBBBB+
281                  *        +----------------------+
282                  *        fbno                fend
283                  */
284                 busyp->length = fbno - busyp->bno;
285         } else {
286                 ASSERT(0);
287         }
288 
289         trace_xfs_extent_busy_reuse(mp, pag->pag_agno, fbno, flen);
290         return true;
291 
292 out_force_log:
293         spin_unlock(&pag->pagb_lock);
294         xfs_log_force(mp, XFS_LOG_SYNC);
295         trace_xfs_extent_busy_force(mp, pag->pag_agno, fbno, flen);
296         spin_lock(&pag->pagb_lock);
297         return false;
298 }
299 
300 
301 /*
302  * For a given extent [fbno, flen], make sure we can reuse it safely.
303  */
304 void
305 xfs_extent_busy_reuse(
306         struct xfs_mount        *mp,
307         xfs_agnumber_t          agno,
308         xfs_agblock_t           fbno,
309         xfs_extlen_t            flen,
310         bool                    userdata)
311 {
312         struct xfs_perag        *pag;
313         struct rb_node          *rbp;
314 
315         ASSERT(flen > 0);
316 
317         pag = xfs_perag_get(mp, agno);
318         spin_lock(&pag->pagb_lock);
319 restart:
320         rbp = pag->pagb_tree.rb_node;
321         while (rbp) {
322                 struct xfs_extent_busy *busyp =
323                         rb_entry(rbp, struct xfs_extent_busy, rb_node);
324                 xfs_agblock_t   bbno = busyp->bno;
325                 xfs_agblock_t   bend = bbno + busyp->length;
326 
327                 if (fbno + flen <= bbno) {
328                         rbp = rbp->rb_left;
329                         continue;
330                 } else if (fbno >= bend) {
331                         rbp = rbp->rb_right;
332                         continue;
333                 }
334 
335                 if (!xfs_extent_busy_update_extent(mp, pag, busyp, fbno, flen,
336                                                   userdata))
337                         goto restart;
338         }
339         spin_unlock(&pag->pagb_lock);
340         xfs_perag_put(pag);
341 }
342 
343 /*
344  * For a given extent [fbno, flen], search the busy extent list to find a
345  * subset of the extent that is not busy.  If *rlen is smaller than
346  * args->minlen no suitable extent could be found, and the higher level
347  * code needs to force out the log and retry the allocation.
348  */
349 void
350 xfs_extent_busy_trim(
351         struct xfs_alloc_arg    *args,
352         xfs_agblock_t           bno,
353         xfs_extlen_t            len,
354         xfs_agblock_t           *rbno,
355         xfs_extlen_t            *rlen)
356 {
357         xfs_agblock_t           fbno;
358         xfs_extlen_t            flen;
359         struct rb_node          *rbp;
360 
361         ASSERT(len > 0);
362 
363         spin_lock(&args->pag->pagb_lock);
364 restart:
365         fbno = bno;
366         flen = len;
367         rbp = args->pag->pagb_tree.rb_node;
368         while (rbp && flen >= args->minlen) {
369                 struct xfs_extent_busy *busyp =
370                         rb_entry(rbp, struct xfs_extent_busy, rb_node);
371                 xfs_agblock_t   fend = fbno + flen;
372                 xfs_agblock_t   bbno = busyp->bno;
373                 xfs_agblock_t   bend = bbno + busyp->length;
374 
375                 if (fend <= bbno) {
376                         rbp = rbp->rb_left;
377                         continue;
378                 } else if (fbno >= bend) {
379                         rbp = rbp->rb_right;
380                         continue;
381                 }
382 
383                 /*
384                  * If this is a metadata allocation, try to reuse the busy
385                  * extent instead of trimming the allocation.
386                  */
387                 if (!args->userdata &&
388                     !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
389                         if (!xfs_extent_busy_update_extent(args->mp, args->pag,
390                                                           busyp, fbno, flen,
391                                                           false))
392                                 goto restart;
393                         continue;
394                 }
395 
396                 if (bbno <= fbno) {
397                         /* start overlap */
398 
399                         /*
400                          * Case 1:
401                          *    bbno           bend
402                          *    +BBBBBBBBBBBBBBBBB+
403                          *        +---------+
404                          *        fbno   fend
405                          *
406                          * Case 2:
407                          *    bbno           bend
408                          *    +BBBBBBBBBBBBBBBBB+
409                          *    +-------------+
410                          *    fbno       fend
411                          *
412                          * Case 3:
413                          *    bbno           bend
414                          *    +BBBBBBBBBBBBBBBBB+
415                          *        +-------------+
416                          *        fbno       fend
417                          *
418                          * Case 4:
419                          *    bbno           bend
420                          *    +BBBBBBBBBBBBBBBBB+
421                          *    +-----------------+
422                          *    fbno           fend
423                          *
424                          * No unbusy region in extent, return failure.
425                          */
426                         if (fend <= bend)
427                                 goto fail;
428 
429                         /*
430                          * Case 5:
431                          *    bbno           bend
432                          *    +BBBBBBBBBBBBBBBBB+
433                          *        +----------------------+
434                          *        fbno                fend
435                          *
436                          * Case 6:
437                          *    bbno           bend
438                          *    +BBBBBBBBBBBBBBBBB+
439                          *    +--------------------------+
440                          *    fbno                    fend
441                          *
442                          * Needs to be trimmed to:
443                          *                       +-------+
444                          *                       fbno fend
445                          */
446                         fbno = bend;
447                 } else if (bend >= fend) {
448                         /* end overlap */
449 
450                         /*
451                          * Case 7:
452                          *             bbno           bend
453                          *             +BBBBBBBBBBBBBBBBB+
454                          *    +------------------+
455                          *    fbno            fend
456                          *
457                          * Case 8:
458                          *             bbno           bend
459                          *             +BBBBBBBBBBBBBBBBB+
460                          *    +--------------------------+
461                          *    fbno                    fend
462                          *
463                          * Needs to be trimmed to:
464                          *    +-------+
465                          *    fbno fend
466                          */
467                         fend = bbno;
468                 } else {
469                         /* middle overlap */
470 
471                         /*
472                          * Case 9:
473                          *             bbno           bend
474                          *             +BBBBBBBBBBBBBBBBB+
475                          *    +-----------------------------------+
476                          *    fbno                             fend
477                          *
478                          * Can be trimmed to:
479                          *    +-------+        OR         +-------+
480                          *    fbno fend                   fbno fend
481                          *
482                          * Backward allocation leads to significant
483                          * fragmentation of directories, which degrades
484                          * directory performance, therefore we always want to
485                          * choose the option that produces forward allocation
486                          * patterns.
487                          * Preferring the lower bno extent will make the next
488                          * request use "fend" as the start of the next
489                          * allocation;  if the segment is no longer busy at
490                          * that point, we'll get a contiguous allocation, but
491                          * even if it is still busy, we will get a forward
492                          * allocation.
493                          * We try to avoid choosing the segment at "bend",
494                          * because that can lead to the next allocation
495                          * taking the segment at "fbno", which would be a
496                          * backward allocation.  We only use the segment at
497                          * "fbno" if it is much larger than the current
498                          * requested size, because in that case there's a
499                          * good chance subsequent allocations will be
500                          * contiguous.
501                          */
502                         if (bbno - fbno >= args->maxlen) {
503                                 /* left candidate fits perfect */
504                                 fend = bbno;
505                         } else if (fend - bend >= args->maxlen * 4) {
506                                 /* right candidate has enough free space */
507                                 fbno = bend;
508                         } else if (bbno - fbno >= args->minlen) {
509                                 /* left candidate fits minimum requirement */
510                                 fend = bbno;
511                         } else {
512                                 goto fail;
513                         }
514                 }
515 
516                 flen = fend - fbno;
517         }
518         spin_unlock(&args->pag->pagb_lock);
519 
520         if (fbno != bno || flen != len) {
521                 trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len,
522                                           fbno, flen);
523         }
524         *rbno = fbno;
525         *rlen = flen;
526         return;
527 fail:
528         /*
529          * Return a zero extent length as failure indications.  All callers
530          * re-check if the trimmed extent satisfies the minlen requirement.
531          */
532         spin_unlock(&args->pag->pagb_lock);
533         trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
534         *rbno = fbno;
535         *rlen = 0;
536 }
537 
538 STATIC void
539 xfs_extent_busy_clear_one(
540         struct xfs_mount        *mp,
541         struct xfs_perag        *pag,
542         struct xfs_extent_busy  *busyp)
543 {
544         if (busyp->length) {
545                 trace_xfs_extent_busy_clear(mp, busyp->agno, busyp->bno,
546                                                 busyp->length);
547                 rb_erase(&busyp->rb_node, &pag->pagb_tree);
548         }
549 
550         list_del_init(&busyp->list);
551         kmem_free(busyp);
552 }
553 
554 /*
555  * Remove all extents on the passed in list from the busy extents tree.
556  * If do_discard is set skip extents that need to be discarded, and mark
557  * these as undergoing a discard operation instead.
558  */
559 void
560 xfs_extent_busy_clear(
561         struct xfs_mount        *mp,
562         struct list_head        *list,
563         bool                    do_discard)
564 {
565         struct xfs_extent_busy  *busyp, *n;
566         struct xfs_perag        *pag = NULL;
567         xfs_agnumber_t          agno = NULLAGNUMBER;
568 
569         list_for_each_entry_safe(busyp, n, list, list) {
570                 if (busyp->agno != agno) {
571                         if (pag) {
572                                 spin_unlock(&pag->pagb_lock);
573                                 xfs_perag_put(pag);
574                         }
575                         pag = xfs_perag_get(mp, busyp->agno);
576                         spin_lock(&pag->pagb_lock);
577                         agno = busyp->agno;
578                 }
579 
580                 if (do_discard && busyp->length &&
581                     !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD))
582                         busyp->flags = XFS_EXTENT_BUSY_DISCARDED;
583                 else
584                         xfs_extent_busy_clear_one(mp, pag, busyp);
585         }
586 
587         if (pag) {
588                 spin_unlock(&pag->pagb_lock);
589                 xfs_perag_put(pag);
590         }
591 }
592 
593 /*
594  * Callback for list_sort to sort busy extents by the AG they reside in.
595  */
596 int
597 xfs_extent_busy_ag_cmp(
598         void                    *priv,
599         struct list_head        *a,
600         struct list_head        *b)
601 {
602         return container_of(a, struct xfs_extent_busy, list)->agno -
603                 container_of(b, struct xfs_extent_busy, list)->agno;
604 }
605 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp