~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/btrfs/raid56.c

Version: ~ [ linux-5.11-rc3 ] ~ [ linux-5.10.7 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.89 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.167 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.215 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.251 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.251 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.85 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Copyright (C) 2012 Fusion-io  All rights reserved.
  3  * Copyright (C) 2012 Intel Corp. All rights reserved.
  4  *
  5  * This program is free software; you can redistribute it and/or
  6  * modify it under the terms of the GNU General Public
  7  * License v2 as published by the Free Software Foundation.
  8  *
  9  * This program is distributed in the hope that it will be useful,
 10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 12  * General Public License for more details.
 13  *
 14  * You should have received a copy of the GNU General Public
 15  * License along with this program; if not, write to the
 16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 17  * Boston, MA 021110-1307, USA.
 18  */
 19 #include <linux/sched.h>
 20 #include <linux/wait.h>
 21 #include <linux/bio.h>
 22 #include <linux/slab.h>
 23 #include <linux/buffer_head.h>
 24 #include <linux/blkdev.h>
 25 #include <linux/random.h>
 26 #include <linux/iocontext.h>
 27 #include <linux/capability.h>
 28 #include <linux/ratelimit.h>
 29 #include <linux/kthread.h>
 30 #include <linux/raid/pq.h>
 31 #include <linux/hash.h>
 32 #include <linux/list_sort.h>
 33 #include <linux/raid/xor.h>
 34 #include <linux/vmalloc.h>
 35 #include <asm/div64.h>
 36 #include "ctree.h"
 37 #include "extent_map.h"
 38 #include "disk-io.h"
 39 #include "transaction.h"
 40 #include "print-tree.h"
 41 #include "volumes.h"
 42 #include "raid56.h"
 43 #include "async-thread.h"
 44 #include "check-integrity.h"
 45 #include "rcu-string.h"
 46 
 47 /* set when additional merges to this rbio are not allowed */
 48 #define RBIO_RMW_LOCKED_BIT     1
 49 
 50 /*
 51  * set when this rbio is sitting in the hash, but it is just a cache
 52  * of past RMW
 53  */
 54 #define RBIO_CACHE_BIT          2
 55 
 56 /*
 57  * set when it is safe to trust the stripe_pages for caching
 58  */
 59 #define RBIO_CACHE_READY_BIT    3
 60 
 61 
 62 #define RBIO_CACHE_SIZE 1024
 63 
 64 struct btrfs_raid_bio {
 65         struct btrfs_fs_info *fs_info;
 66         struct btrfs_bio *bbio;
 67 
 68         /*
 69          * logical block numbers for the start of each stripe
 70          * The last one or two are p/q.  These are sorted,
 71          * so raid_map[0] is the start of our full stripe
 72          */
 73         u64 *raid_map;
 74 
 75         /* while we're doing rmw on a stripe
 76          * we put it into a hash table so we can
 77          * lock the stripe and merge more rbios
 78          * into it.
 79          */
 80         struct list_head hash_list;
 81 
 82         /*
 83          * LRU list for the stripe cache
 84          */
 85         struct list_head stripe_cache;
 86 
 87         /*
 88          * for scheduling work in the helper threads
 89          */
 90         struct btrfs_work work;
 91 
 92         /*
 93          * bio list and bio_list_lock are used
 94          * to add more bios into the stripe
 95          * in hopes of avoiding the full rmw
 96          */
 97         struct bio_list bio_list;
 98         spinlock_t bio_list_lock;
 99 
100         /* also protected by the bio_list_lock, the
101          * plug list is used by the plugging code
102          * to collect partial bios while plugged.  The
103          * stripe locking code also uses it to hand off
104          * the stripe lock to the next pending IO
105          */
106         struct list_head plug_list;
107 
108         /*
109          * flags that tell us if it is safe to
110          * merge with this bio
111          */
112         unsigned long flags;
113 
114         /* size of each individual stripe on disk */
115         int stripe_len;
116 
117         /* number of data stripes (no p/q) */
118         int nr_data;
119 
120         /*
121          * set if we're doing a parity rebuild
122          * for a read from higher up, which is handled
123          * differently from a parity rebuild as part of
124          * rmw
125          */
126         int read_rebuild;
127 
128         /* first bad stripe */
129         int faila;
130 
131         /* second bad stripe (for raid6 use) */
132         int failb;
133 
134         /*
135          * number of pages needed to represent the full
136          * stripe
137          */
138         int nr_pages;
139 
140         /*
141          * size of all the bios in the bio_list.  This
142          * helps us decide if the rbio maps to a full
143          * stripe or not
144          */
145         int bio_list_bytes;
146 
147         atomic_t refs;
148 
149         /*
150          * these are two arrays of pointers.  We allocate the
151          * rbio big enough to hold them both and setup their
152          * locations when the rbio is allocated
153          */
154 
155         /* pointers to pages that we allocated for
156          * reading/writing stripes directly from the disk (including P/Q)
157          */
158         struct page **stripe_pages;
159 
160         /*
161          * pointers to the pages in the bio_list.  Stored
162          * here for faster lookup
163          */
164         struct page **bio_pages;
165 };
166 
167 static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
168 static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
169 static void rmw_work(struct btrfs_work *work);
170 static void read_rebuild_work(struct btrfs_work *work);
171 static void async_rmw_stripe(struct btrfs_raid_bio *rbio);
172 static void async_read_rebuild(struct btrfs_raid_bio *rbio);
173 static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
174 static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
175 static void __free_raid_bio(struct btrfs_raid_bio *rbio);
176 static void index_rbio_pages(struct btrfs_raid_bio *rbio);
177 static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
178 
179 /*
180  * the stripe hash table is used for locking, and to collect
181  * bios in hopes of making a full stripe
182  */
183 int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
184 {
185         struct btrfs_stripe_hash_table *table;
186         struct btrfs_stripe_hash_table *x;
187         struct btrfs_stripe_hash *cur;
188         struct btrfs_stripe_hash *h;
189         int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
190         int i;
191         int table_size;
192 
193         if (info->stripe_hash_table)
194                 return 0;
195 
196         /*
197          * The table is large, starting with order 4 and can go as high as
198          * order 7 in case lock debugging is turned on.
199          *
200          * Try harder to allocate and fallback to vmalloc to lower the chance
201          * of a failing mount.
202          */
203         table_size = sizeof(*table) + sizeof(*h) * num_entries;
204         table = kzalloc(table_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
205         if (!table) {
206                 table = vzalloc(table_size);
207                 if (!table)
208                         return -ENOMEM;
209         }
210 
211         spin_lock_init(&table->cache_lock);
212         INIT_LIST_HEAD(&table->stripe_cache);
213 
214         h = table->table;
215 
216         for (i = 0; i < num_entries; i++) {
217                 cur = h + i;
218                 INIT_LIST_HEAD(&cur->hash_list);
219                 spin_lock_init(&cur->lock);
220                 init_waitqueue_head(&cur->wait);
221         }
222 
223         x = cmpxchg(&info->stripe_hash_table, NULL, table);
224         if (x) {
225                 if (is_vmalloc_addr(x))
226                         vfree(x);
227                 else
228                         kfree(x);
229         }
230         return 0;
231 }
232 
233 /*
234  * caching an rbio means to copy anything from the
235  * bio_pages array into the stripe_pages array.  We
236  * use the page uptodate bit in the stripe cache array
237  * to indicate if it has valid data
238  *
239  * once the caching is done, we set the cache ready
240  * bit.
241  */
242 static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
243 {
244         int i;
245         char *s;
246         char *d;
247         int ret;
248 
249         ret = alloc_rbio_pages(rbio);
250         if (ret)
251                 return;
252 
253         for (i = 0; i < rbio->nr_pages; i++) {
254                 if (!rbio->bio_pages[i])
255                         continue;
256 
257                 s = kmap(rbio->bio_pages[i]);
258                 d = kmap(rbio->stripe_pages[i]);
259 
260                 memcpy(d, s, PAGE_CACHE_SIZE);
261 
262                 kunmap(rbio->bio_pages[i]);
263                 kunmap(rbio->stripe_pages[i]);
264                 SetPageUptodate(rbio->stripe_pages[i]);
265         }
266         set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
267 }
268 
269 /*
270  * we hash on the first logical address of the stripe
271  */
272 static int rbio_bucket(struct btrfs_raid_bio *rbio)
273 {
274         u64 num = rbio->raid_map[0];
275 
276         /*
277          * we shift down quite a bit.  We're using byte
278          * addressing, and most of the lower bits are zeros.
279          * This tends to upset hash_64, and it consistently
280          * returns just one or two different values.
281          *
282          * shifting off the lower bits fixes things.
283          */
284         return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS);
285 }
286 
287 /*
288  * stealing an rbio means taking all the uptodate pages from the stripe
289  * array in the source rbio and putting them into the destination rbio
290  */
291 static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest)
292 {
293         int i;
294         struct page *s;
295         struct page *d;
296 
297         if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags))
298                 return;
299 
300         for (i = 0; i < dest->nr_pages; i++) {
301                 s = src->stripe_pages[i];
302                 if (!s || !PageUptodate(s)) {
303                         continue;
304                 }
305 
306                 d = dest->stripe_pages[i];
307                 if (d)
308                         __free_page(d);
309 
310                 dest->stripe_pages[i] = s;
311                 src->stripe_pages[i] = NULL;
312         }
313 }
314 
315 /*
316  * merging means we take the bio_list from the victim and
317  * splice it into the destination.  The victim should
318  * be discarded afterwards.
319  *
320  * must be called with dest->rbio_list_lock held
321  */
322 static void merge_rbio(struct btrfs_raid_bio *dest,
323                        struct btrfs_raid_bio *victim)
324 {
325         bio_list_merge(&dest->bio_list, &victim->bio_list);
326         dest->bio_list_bytes += victim->bio_list_bytes;
327         bio_list_init(&victim->bio_list);
328 }
329 
330 /*
331  * used to prune items that are in the cache.  The caller
332  * must hold the hash table lock.
333  */
334 static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
335 {
336         int bucket = rbio_bucket(rbio);
337         struct btrfs_stripe_hash_table *table;
338         struct btrfs_stripe_hash *h;
339         int freeit = 0;
340 
341         /*
342          * check the bit again under the hash table lock.
343          */
344         if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
345                 return;
346 
347         table = rbio->fs_info->stripe_hash_table;
348         h = table->table + bucket;
349 
350         /* hold the lock for the bucket because we may be
351          * removing it from the hash table
352          */
353         spin_lock(&h->lock);
354 
355         /*
356          * hold the lock for the bio list because we need
357          * to make sure the bio list is empty
358          */
359         spin_lock(&rbio->bio_list_lock);
360 
361         if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) {
362                 list_del_init(&rbio->stripe_cache);
363                 table->cache_size -= 1;
364                 freeit = 1;
365 
366                 /* if the bio list isn't empty, this rbio is
367                  * still involved in an IO.  We take it out
368                  * of the cache list, and drop the ref that
369                  * was held for the list.
370                  *
371                  * If the bio_list was empty, we also remove
372                  * the rbio from the hash_table, and drop
373                  * the corresponding ref
374                  */
375                 if (bio_list_empty(&rbio->bio_list)) {
376                         if (!list_empty(&rbio->hash_list)) {
377                                 list_del_init(&rbio->hash_list);
378                                 atomic_dec(&rbio->refs);
379                                 BUG_ON(!list_empty(&rbio->plug_list));
380                         }
381                 }
382         }
383 
384         spin_unlock(&rbio->bio_list_lock);
385         spin_unlock(&h->lock);
386 
387         if (freeit)
388                 __free_raid_bio(rbio);
389 }
390 
391 /*
392  * prune a given rbio from the cache
393  */
394 static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
395 {
396         struct btrfs_stripe_hash_table *table;
397         unsigned long flags;
398 
399         if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
400                 return;
401 
402         table = rbio->fs_info->stripe_hash_table;
403 
404         spin_lock_irqsave(&table->cache_lock, flags);
405         __remove_rbio_from_cache(rbio);
406         spin_unlock_irqrestore(&table->cache_lock, flags);
407 }
408 
409 /*
410  * remove everything in the cache
411  */
412 static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
413 {
414         struct btrfs_stripe_hash_table *table;
415         unsigned long flags;
416         struct btrfs_raid_bio *rbio;
417 
418         table = info->stripe_hash_table;
419 
420         spin_lock_irqsave(&table->cache_lock, flags);
421         while (!list_empty(&table->stripe_cache)) {
422                 rbio = list_entry(table->stripe_cache.next,
423                                   struct btrfs_raid_bio,
424                                   stripe_cache);
425                 __remove_rbio_from_cache(rbio);
426         }
427         spin_unlock_irqrestore(&table->cache_lock, flags);
428 }
429 
430 /*
431  * remove all cached entries and free the hash table
432  * used by unmount
433  */
434 void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
435 {
436         if (!info->stripe_hash_table)
437                 return;
438         btrfs_clear_rbio_cache(info);
439         if (is_vmalloc_addr(info->stripe_hash_table))
440                 vfree(info->stripe_hash_table);
441         else
442                 kfree(info->stripe_hash_table);
443         info->stripe_hash_table = NULL;
444 }
445 
446 /*
447  * insert an rbio into the stripe cache.  It
448  * must have already been prepared by calling
449  * cache_rbio_pages
450  *
451  * If this rbio was already cached, it gets
452  * moved to the front of the lru.
453  *
454  * If the size of the rbio cache is too big, we
455  * prune an item.
456  */
457 static void cache_rbio(struct btrfs_raid_bio *rbio)
458 {
459         struct btrfs_stripe_hash_table *table;
460         unsigned long flags;
461 
462         if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
463                 return;
464 
465         table = rbio->fs_info->stripe_hash_table;
466 
467         spin_lock_irqsave(&table->cache_lock, flags);
468         spin_lock(&rbio->bio_list_lock);
469 
470         /* bump our ref if we were not in the list before */
471         if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags))
472                 atomic_inc(&rbio->refs);
473 
474         if (!list_empty(&rbio->stripe_cache)){
475                 list_move(&rbio->stripe_cache, &table->stripe_cache);
476         } else {
477                 list_add(&rbio->stripe_cache, &table->stripe_cache);
478                 table->cache_size += 1;
479         }
480 
481         spin_unlock(&rbio->bio_list_lock);
482 
483         if (table->cache_size > RBIO_CACHE_SIZE) {
484                 struct btrfs_raid_bio *found;
485 
486                 found = list_entry(table->stripe_cache.prev,
487                                   struct btrfs_raid_bio,
488                                   stripe_cache);
489 
490                 if (found != rbio)
491                         __remove_rbio_from_cache(found);
492         }
493 
494         spin_unlock_irqrestore(&table->cache_lock, flags);
495         return;
496 }
497 
498 /*
499  * helper function to run the xor_blocks api.  It is only
500  * able to do MAX_XOR_BLOCKS at a time, so we need to
501  * loop through.
502  */
503 static void run_xor(void **pages, int src_cnt, ssize_t len)
504 {
505         int src_off = 0;
506         int xor_src_cnt = 0;
507         void *dest = pages[src_cnt];
508 
509         while(src_cnt > 0) {
510                 xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
511                 xor_blocks(xor_src_cnt, len, dest, pages + src_off);
512 
513                 src_cnt -= xor_src_cnt;
514                 src_off += xor_src_cnt;
515         }
516 }
517 
518 /*
519  * returns true if the bio list inside this rbio
520  * covers an entire stripe (no rmw required).
521  * Must be called with the bio list lock held, or
522  * at a time when you know it is impossible to add
523  * new bios into the list
524  */
525 static int __rbio_is_full(struct btrfs_raid_bio *rbio)
526 {
527         unsigned long size = rbio->bio_list_bytes;
528         int ret = 1;
529 
530         if (size != rbio->nr_data * rbio->stripe_len)
531                 ret = 0;
532 
533         BUG_ON(size > rbio->nr_data * rbio->stripe_len);
534         return ret;
535 }
536 
537 static int rbio_is_full(struct btrfs_raid_bio *rbio)
538 {
539         unsigned long flags;
540         int ret;
541 
542         spin_lock_irqsave(&rbio->bio_list_lock, flags);
543         ret = __rbio_is_full(rbio);
544         spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
545         return ret;
546 }
547 
548 /*
549  * returns 1 if it is safe to merge two rbios together.
550  * The merging is safe if the two rbios correspond to
551  * the same stripe and if they are both going in the same
552  * direction (read vs write), and if neither one is
553  * locked for final IO
554  *
555  * The caller is responsible for locking such that
556  * rmw_locked is safe to test
557  */
558 static int rbio_can_merge(struct btrfs_raid_bio *last,
559                           struct btrfs_raid_bio *cur)
560 {
561         if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) ||
562             test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags))
563                 return 0;
564 
565         /*
566          * we can't merge with cached rbios, since the
567          * idea is that when we merge the destination
568          * rbio is going to run our IO for us.  We can
569          * steal from cached rbio's though, other functions
570          * handle that.
571          */
572         if (test_bit(RBIO_CACHE_BIT, &last->flags) ||
573             test_bit(RBIO_CACHE_BIT, &cur->flags))
574                 return 0;
575 
576         if (last->raid_map[0] !=
577             cur->raid_map[0])
578                 return 0;
579 
580         /* reads can't merge with writes */
581         if (last->read_rebuild !=
582             cur->read_rebuild) {
583                 return 0;
584         }
585 
586         return 1;
587 }
588 
589 /*
590  * helper to index into the pstripe
591  */
592 static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
593 {
594         index += (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
595         return rbio->stripe_pages[index];
596 }
597 
598 /*
599  * helper to index into the qstripe, returns null
600  * if there is no qstripe
601  */
602 static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
603 {
604         if (rbio->nr_data + 1 == rbio->bbio->num_stripes)
605                 return NULL;
606 
607         index += ((rbio->nr_data + 1) * rbio->stripe_len) >>
608                 PAGE_CACHE_SHIFT;
609         return rbio->stripe_pages[index];
610 }
611 
612 /*
613  * The first stripe in the table for a logical address
614  * has the lock.  rbios are added in one of three ways:
615  *
616  * 1) Nobody has the stripe locked yet.  The rbio is given
617  * the lock and 0 is returned.  The caller must start the IO
618  * themselves.
619  *
620  * 2) Someone has the stripe locked, but we're able to merge
621  * with the lock owner.  The rbio is freed and the IO will
622  * start automatically along with the existing rbio.  1 is returned.
623  *
624  * 3) Someone has the stripe locked, but we're not able to merge.
625  * The rbio is added to the lock owner's plug list, or merged into
626  * an rbio already on the plug list.  When the lock owner unlocks,
627  * the next rbio on the list is run and the IO is started automatically.
628  * 1 is returned
629  *
630  * If we return 0, the caller still owns the rbio and must continue with
631  * IO submission.  If we return 1, the caller must assume the rbio has
632  * already been freed.
633  */
634 static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
635 {
636         int bucket = rbio_bucket(rbio);
637         struct btrfs_stripe_hash *h = rbio->fs_info->stripe_hash_table->table + bucket;
638         struct btrfs_raid_bio *cur;
639         struct btrfs_raid_bio *pending;
640         unsigned long flags;
641         DEFINE_WAIT(wait);
642         struct btrfs_raid_bio *freeit = NULL;
643         struct btrfs_raid_bio *cache_drop = NULL;
644         int ret = 0;
645         int walk = 0;
646 
647         spin_lock_irqsave(&h->lock, flags);
648         list_for_each_entry(cur, &h->hash_list, hash_list) {
649                 walk++;
650                 if (cur->raid_map[0] == rbio->raid_map[0]) {
651                         spin_lock(&cur->bio_list_lock);
652 
653                         /* can we steal this cached rbio's pages? */
654                         if (bio_list_empty(&cur->bio_list) &&
655                             list_empty(&cur->plug_list) &&
656                             test_bit(RBIO_CACHE_BIT, &cur->flags) &&
657                             !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
658                                 list_del_init(&cur->hash_list);
659                                 atomic_dec(&cur->refs);
660 
661                                 steal_rbio(cur, rbio);
662                                 cache_drop = cur;
663                                 spin_unlock(&cur->bio_list_lock);
664 
665                                 goto lockit;
666                         }
667 
668                         /* can we merge into the lock owner? */
669                         if (rbio_can_merge(cur, rbio)) {
670                                 merge_rbio(cur, rbio);
671                                 spin_unlock(&cur->bio_list_lock);
672                                 freeit = rbio;
673                                 ret = 1;
674                                 goto out;
675                         }
676 
677 
678                         /*
679                          * we couldn't merge with the running
680                          * rbio, see if we can merge with the
681                          * pending ones.  We don't have to
682                          * check for rmw_locked because there
683                          * is no way they are inside finish_rmw
684                          * right now
685                          */
686                         list_for_each_entry(pending, &cur->plug_list,
687                                             plug_list) {
688                                 if (rbio_can_merge(pending, rbio)) {
689                                         merge_rbio(pending, rbio);
690                                         spin_unlock(&cur->bio_list_lock);
691                                         freeit = rbio;
692                                         ret = 1;
693                                         goto out;
694                                 }
695                         }
696 
697                         /* no merging, put us on the tail of the plug list,
698                          * our rbio will be started with the currently
699                          * running rbio unlocks
700                          */
701                         list_add_tail(&rbio->plug_list, &cur->plug_list);
702                         spin_unlock(&cur->bio_list_lock);
703                         ret = 1;
704                         goto out;
705                 }
706         }
707 lockit:
708         atomic_inc(&rbio->refs);
709         list_add(&rbio->hash_list, &h->hash_list);
710 out:
711         spin_unlock_irqrestore(&h->lock, flags);
712         if (cache_drop)
713                 remove_rbio_from_cache(cache_drop);
714         if (freeit)
715                 __free_raid_bio(freeit);
716         return ret;
717 }
718 
719 /*
720  * called as rmw or parity rebuild is completed.  If the plug list has more
721  * rbios waiting for this stripe, the next one on the list will be started
722  */
723 static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
724 {
725         int bucket;
726         struct btrfs_stripe_hash *h;
727         unsigned long flags;
728         int keep_cache = 0;
729 
730         bucket = rbio_bucket(rbio);
731         h = rbio->fs_info->stripe_hash_table->table + bucket;
732 
733         if (list_empty(&rbio->plug_list))
734                 cache_rbio(rbio);
735 
736         spin_lock_irqsave(&h->lock, flags);
737         spin_lock(&rbio->bio_list_lock);
738 
739         if (!list_empty(&rbio->hash_list)) {
740                 /*
741                  * if we're still cached and there is no other IO
742                  * to perform, just leave this rbio here for others
743                  * to steal from later
744                  */
745                 if (list_empty(&rbio->plug_list) &&
746                     test_bit(RBIO_CACHE_BIT, &rbio->flags)) {
747                         keep_cache = 1;
748                         clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
749                         BUG_ON(!bio_list_empty(&rbio->bio_list));
750                         goto done;
751                 }
752 
753                 list_del_init(&rbio->hash_list);
754                 atomic_dec(&rbio->refs);
755 
756                 /*
757                  * we use the plug list to hold all the rbios
758                  * waiting for the chance to lock this stripe.
759                  * hand the lock over to one of them.
760                  */
761                 if (!list_empty(&rbio->plug_list)) {
762                         struct btrfs_raid_bio *next;
763                         struct list_head *head = rbio->plug_list.next;
764 
765                         next = list_entry(head, struct btrfs_raid_bio,
766                                           plug_list);
767 
768                         list_del_init(&rbio->plug_list);
769 
770                         list_add(&next->hash_list, &h->hash_list);
771                         atomic_inc(&next->refs);
772                         spin_unlock(&rbio->bio_list_lock);
773                         spin_unlock_irqrestore(&h->lock, flags);
774 
775                         if (next->read_rebuild)
776                                 async_read_rebuild(next);
777                         else {
778                                 steal_rbio(rbio, next);
779                                 async_rmw_stripe(next);
780                         }
781 
782                         goto done_nolock;
783                 } else  if (waitqueue_active(&h->wait)) {
784                         spin_unlock(&rbio->bio_list_lock);
785                         spin_unlock_irqrestore(&h->lock, flags);
786                         wake_up(&h->wait);
787                         goto done_nolock;
788                 }
789         }
790 done:
791         spin_unlock(&rbio->bio_list_lock);
792         spin_unlock_irqrestore(&h->lock, flags);
793 
794 done_nolock:
795         if (!keep_cache)
796                 remove_rbio_from_cache(rbio);
797 }
798 
799 static void __free_raid_bio(struct btrfs_raid_bio *rbio)
800 {
801         int i;
802 
803         WARN_ON(atomic_read(&rbio->refs) < 0);
804         if (!atomic_dec_and_test(&rbio->refs))
805                 return;
806 
807         WARN_ON(!list_empty(&rbio->stripe_cache));
808         WARN_ON(!list_empty(&rbio->hash_list));
809         WARN_ON(!bio_list_empty(&rbio->bio_list));
810 
811         for (i = 0; i < rbio->nr_pages; i++) {
812                 if (rbio->stripe_pages[i]) {
813                         __free_page(rbio->stripe_pages[i]);
814                         rbio->stripe_pages[i] = NULL;
815                 }
816         }
817         kfree(rbio->raid_map);
818         kfree(rbio->bbio);
819         kfree(rbio);
820 }
821 
822 static void free_raid_bio(struct btrfs_raid_bio *rbio)
823 {
824         unlock_stripe(rbio);
825         __free_raid_bio(rbio);
826 }
827 
828 /*
829  * this frees the rbio and runs through all the bios in the
830  * bio_list and calls end_io on them
831  */
832 static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate)
833 {
834         struct bio *cur = bio_list_get(&rbio->bio_list);
835         struct bio *next;
836         free_raid_bio(rbio);
837 
838         while (cur) {
839                 next = cur->bi_next;
840                 cur->bi_next = NULL;
841                 if (uptodate)
842                         set_bit(BIO_UPTODATE, &cur->bi_flags);
843                 bio_endio(cur, err);
844                 cur = next;
845         }
846 }
847 
848 /*
849  * end io function used by finish_rmw.  When we finally
850  * get here, we've written a full stripe
851  */
852 static void raid_write_end_io(struct bio *bio, int err)
853 {
854         struct btrfs_raid_bio *rbio = bio->bi_private;
855 
856         if (err)
857                 fail_bio_stripe(rbio, bio);
858 
859         bio_put(bio);
860 
861         if (!atomic_dec_and_test(&rbio->bbio->stripes_pending))
862                 return;
863 
864         err = 0;
865 
866         /* OK, we have read all the stripes we need to. */
867         if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors)
868                 err = -EIO;
869 
870         rbio_orig_end_io(rbio, err, 0);
871         return;
872 }
873 
874 /*
875  * the read/modify/write code wants to use the original bio for
876  * any pages it included, and then use the rbio for everything
877  * else.  This function decides if a given index (stripe number)
878  * and page number in that stripe fall inside the original bio
879  * or the rbio.
880  *
881  * if you set bio_list_only, you'll get a NULL back for any ranges
882  * that are outside the bio_list
883  *
884  * This doesn't take any refs on anything, you get a bare page pointer
885  * and the caller must bump refs as required.
886  *
887  * You must call index_rbio_pages once before you can trust
888  * the answers from this function.
889  */
890 static struct page *page_in_rbio(struct btrfs_raid_bio *rbio,
891                                  int index, int pagenr, int bio_list_only)
892 {
893         int chunk_page;
894         struct page *p = NULL;
895 
896         chunk_page = index * (rbio->stripe_len >> PAGE_SHIFT) + pagenr;
897 
898         spin_lock_irq(&rbio->bio_list_lock);
899         p = rbio->bio_pages[chunk_page];
900         spin_unlock_irq(&rbio->bio_list_lock);
901 
902         if (p || bio_list_only)
903                 return p;
904 
905         return rbio->stripe_pages[chunk_page];
906 }
907 
908 /*
909  * number of pages we need for the entire stripe across all the
910  * drives
911  */
912 static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
913 {
914         unsigned long nr = stripe_len * nr_stripes;
915         return (nr + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
916 }
917 
918 /*
919  * allocation and initial setup for the btrfs_raid_bio.  Not
920  * this does not allocate any pages for rbio->pages.
921  */
922 static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
923                           struct btrfs_bio *bbio, u64 *raid_map,
924                           u64 stripe_len)
925 {
926         struct btrfs_raid_bio *rbio;
927         int nr_data = 0;
928         int num_pages = rbio_nr_pages(stripe_len, bbio->num_stripes);
929         void *p;
930 
931         rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2,
932                         GFP_NOFS);
933         if (!rbio) {
934                 kfree(raid_map);
935                 kfree(bbio);
936                 return ERR_PTR(-ENOMEM);
937         }
938 
939         bio_list_init(&rbio->bio_list);
940         INIT_LIST_HEAD(&rbio->plug_list);
941         spin_lock_init(&rbio->bio_list_lock);
942         INIT_LIST_HEAD(&rbio->stripe_cache);
943         INIT_LIST_HEAD(&rbio->hash_list);
944         rbio->bbio = bbio;
945         rbio->raid_map = raid_map;
946         rbio->fs_info = root->fs_info;
947         rbio->stripe_len = stripe_len;
948         rbio->nr_pages = num_pages;
949         rbio->faila = -1;
950         rbio->failb = -1;
951         atomic_set(&rbio->refs, 1);
952 
953         /*
954          * the stripe_pages and bio_pages array point to the extra
955          * memory we allocated past the end of the rbio
956          */
957         p = rbio + 1;
958         rbio->stripe_pages = p;
959         rbio->bio_pages = p + sizeof(struct page *) * num_pages;
960 
961         if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE)
962                 nr_data = bbio->num_stripes - 2;
963         else
964                 nr_data = bbio->num_stripes - 1;
965 
966         rbio->nr_data = nr_data;
967         return rbio;
968 }
969 
970 /* allocate pages for all the stripes in the bio, including parity */
971 static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
972 {
973         int i;
974         struct page *page;
975 
976         for (i = 0; i < rbio->nr_pages; i++) {
977                 if (rbio->stripe_pages[i])
978                         continue;
979                 page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
980                 if (!page)
981                         return -ENOMEM;
982                 rbio->stripe_pages[i] = page;
983                 ClearPageUptodate(page);
984         }
985         return 0;
986 }
987 
988 /* allocate pages for just the p/q stripes */
989 static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
990 {
991         int i;
992         struct page *page;
993 
994         i = (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
995 
996         for (; i < rbio->nr_pages; i++) {
997                 if (rbio->stripe_pages[i])
998                         continue;
999                 page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
1000                 if (!page)
1001                         return -ENOMEM;
1002                 rbio->stripe_pages[i] = page;
1003         }
1004         return 0;
1005 }
1006 
1007 /*
1008  * add a single page from a specific stripe into our list of bios for IO
1009  * this will try to merge into existing bios if possible, and returns
1010  * zero if all went well.
1011  */
1012 static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
1013                             struct bio_list *bio_list,
1014                             struct page *page,
1015                             int stripe_nr,
1016                             unsigned long page_index,
1017                             unsigned long bio_max_len)
1018 {
1019         struct bio *last = bio_list->tail;
1020         u64 last_end = 0;
1021         int ret;
1022         struct bio *bio;
1023         struct btrfs_bio_stripe *stripe;
1024         u64 disk_start;
1025 
1026         stripe = &rbio->bbio->stripes[stripe_nr];
1027         disk_start = stripe->physical + (page_index << PAGE_CACHE_SHIFT);
1028 
1029         /* if the device is missing, just fail this stripe */
1030         if (!stripe->dev->bdev)
1031                 return fail_rbio_index(rbio, stripe_nr);
1032 
1033         /* see if we can add this page onto our existing bio */
1034         if (last) {
1035                 last_end = (u64)last->bi_sector << 9;
1036                 last_end += last->bi_size;
1037 
1038                 /*
1039                  * we can't merge these if they are from different
1040                  * devices or if they are not contiguous
1041                  */
1042                 if (last_end == disk_start && stripe->dev->bdev &&
1043                     test_bit(BIO_UPTODATE, &last->bi_flags) &&
1044                     last->bi_bdev == stripe->dev->bdev) {
1045                         ret = bio_add_page(last, page, PAGE_CACHE_SIZE, 0);
1046                         if (ret == PAGE_CACHE_SIZE)
1047                                 return 0;
1048                 }
1049         }
1050 
1051         /* put a new bio on the list */
1052         bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
1053         if (!bio)
1054                 return -ENOMEM;
1055 
1056         bio->bi_size = 0;
1057         bio->bi_bdev = stripe->dev->bdev;
1058         bio->bi_sector = disk_start >> 9;
1059         set_bit(BIO_UPTODATE, &bio->bi_flags);
1060 
1061         bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
1062         bio_list_add(bio_list, bio);
1063         return 0;
1064 }
1065 
1066 /*
1067  * while we're doing the read/modify/write cycle, we could
1068  * have errors in reading pages off the disk.  This checks
1069  * for errors and if we're not able to read the page it'll
1070  * trigger parity reconstruction.  The rmw will be finished
1071  * after we've reconstructed the failed stripes
1072  */
1073 static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
1074 {
1075         if (rbio->faila >= 0 || rbio->failb >= 0) {
1076                 BUG_ON(rbio->faila == rbio->bbio->num_stripes - 1);
1077                 __raid56_parity_recover(rbio);
1078         } else {
1079                 finish_rmw(rbio);
1080         }
1081 }
1082 
1083 /*
1084  * these are just the pages from the rbio array, not from anything
1085  * the FS sent down to us
1086  */
1087 static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe, int page)
1088 {
1089         int index;
1090         index = stripe * (rbio->stripe_len >> PAGE_CACHE_SHIFT);
1091         index += page;
1092         return rbio->stripe_pages[index];
1093 }
1094 
1095 /*
1096  * helper function to walk our bio list and populate the bio_pages array with
1097  * the result.  This seems expensive, but it is faster than constantly
1098  * searching through the bio list as we setup the IO in finish_rmw or stripe
1099  * reconstruction.
1100  *
1101  * This must be called before you trust the answers from page_in_rbio
1102  */
1103 static void index_rbio_pages(struct btrfs_raid_bio *rbio)
1104 {
1105         struct bio *bio;
1106         u64 start;
1107         unsigned long stripe_offset;
1108         unsigned long page_index;
1109         struct page *p;
1110         int i;
1111 
1112         spin_lock_irq(&rbio->bio_list_lock);
1113         bio_list_for_each(bio, &rbio->bio_list) {
1114                 start = (u64)bio->bi_sector << 9;
1115                 stripe_offset = start - rbio->raid_map[0];
1116                 page_index = stripe_offset >> PAGE_CACHE_SHIFT;
1117 
1118                 for (i = 0; i < bio->bi_vcnt; i++) {
1119                         p = bio->bi_io_vec[i].bv_page;
1120                         rbio->bio_pages[page_index + i] = p;
1121                 }
1122         }
1123         spin_unlock_irq(&rbio->bio_list_lock);
1124 }
1125 
1126 /*
1127  * this is called from one of two situations.  We either
1128  * have a full stripe from the higher layers, or we've read all
1129  * the missing bits off disk.
1130  *
1131  * This will calculate the parity and then send down any
1132  * changed blocks.
1133  */
1134 static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
1135 {
1136         struct btrfs_bio *bbio = rbio->bbio;
1137         void *pointers[bbio->num_stripes];
1138         int stripe_len = rbio->stripe_len;
1139         int nr_data = rbio->nr_data;
1140         int stripe;
1141         int pagenr;
1142         int p_stripe = -1;
1143         int q_stripe = -1;
1144         struct bio_list bio_list;
1145         struct bio *bio;
1146         int pages_per_stripe = stripe_len >> PAGE_CACHE_SHIFT;
1147         int ret;
1148 
1149         bio_list_init(&bio_list);
1150 
1151         if (bbio->num_stripes - rbio->nr_data == 1) {
1152                 p_stripe = bbio->num_stripes - 1;
1153         } else if (bbio->num_stripes - rbio->nr_data == 2) {
1154                 p_stripe = bbio->num_stripes - 2;
1155                 q_stripe = bbio->num_stripes - 1;
1156         } else {
1157                 BUG();
1158         }
1159 
1160         /* at this point we either have a full stripe,
1161          * or we've read the full stripe from the drive.
1162          * recalculate the parity and write the new results.
1163          *
1164          * We're not allowed to add any new bios to the
1165          * bio list here, anyone else that wants to
1166          * change this stripe needs to do their own rmw.
1167          */
1168         spin_lock_irq(&rbio->bio_list_lock);
1169         set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
1170         spin_unlock_irq(&rbio->bio_list_lock);
1171 
1172         atomic_set(&rbio->bbio->error, 0);
1173 
1174         /*
1175          * now that we've set rmw_locked, run through the
1176          * bio list one last time and map the page pointers
1177          *
1178          * We don't cache full rbios because we're assuming
1179          * the higher layers are unlikely to use this area of
1180          * the disk again soon.  If they do use it again,
1181          * hopefully they will send another full bio.
1182          */
1183         index_rbio_pages(rbio);
1184         if (!rbio_is_full(rbio))
1185                 cache_rbio_pages(rbio);
1186         else
1187                 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
1188 
1189         for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
1190                 struct page *p;
1191                 /* first collect one page from each data stripe */
1192                 for (stripe = 0; stripe < nr_data; stripe++) {
1193                         p = page_in_rbio(rbio, stripe, pagenr, 0);
1194                         pointers[stripe] = kmap(p);
1195                 }
1196 
1197                 /* then add the parity stripe */
1198                 p = rbio_pstripe_page(rbio, pagenr);
1199                 SetPageUptodate(p);
1200                 pointers[stripe++] = kmap(p);
1201 
1202                 if (q_stripe != -1) {
1203 
1204                         /*
1205                          * raid6, add the qstripe and call the
1206                          * library function to fill in our p/q
1207                          */
1208                         p = rbio_qstripe_page(rbio, pagenr);
1209                         SetPageUptodate(p);
1210                         pointers[stripe++] = kmap(p);
1211 
1212                         raid6_call.gen_syndrome(bbio->num_stripes, PAGE_SIZE,
1213                                                 pointers);
1214                 } else {
1215                         /* raid5 */
1216                         memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
1217                         run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE);
1218                 }
1219 
1220 
1221                 for (stripe = 0; stripe < bbio->num_stripes; stripe++)
1222                         kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
1223         }
1224 
1225         /*
1226          * time to start writing.  Make bios for everything from the
1227          * higher layers (the bio_list in our rbio) and our p/q.  Ignore
1228          * everything else.
1229          */
1230         for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
1231                 for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
1232                         struct page *page;
1233                         if (stripe < rbio->nr_data) {
1234                                 page = page_in_rbio(rbio, stripe, pagenr, 1);
1235                                 if (!page)
1236                                         continue;
1237                         } else {
1238                                page = rbio_stripe_page(rbio, stripe, pagenr);
1239                         }
1240 
1241                         ret = rbio_add_io_page(rbio, &bio_list,
1242                                        page, stripe, pagenr, rbio->stripe_len);
1243                         if (ret)
1244                                 goto cleanup;
1245                 }
1246         }
1247 
1248         atomic_set(&bbio->stripes_pending, bio_list_size(&bio_list));
1249         BUG_ON(atomic_read(&bbio->stripes_pending) == 0);
1250 
1251         while (1) {
1252                 bio = bio_list_pop(&bio_list);
1253                 if (!bio)
1254                         break;
1255 
1256                 bio->bi_private = rbio;
1257                 bio->bi_end_io = raid_write_end_io;
1258                 BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
1259                 submit_bio(WRITE, bio);
1260         }
1261         return;
1262 
1263 cleanup:
1264         rbio_orig_end_io(rbio, -EIO, 0);
1265 }
1266 
1267 /*
1268  * helper to find the stripe number for a given bio.  Used to figure out which
1269  * stripe has failed.  This expects the bio to correspond to a physical disk,
1270  * so it looks up based on physical sector numbers.
1271  */
1272 static int find_bio_stripe(struct btrfs_raid_bio *rbio,
1273                            struct bio *bio)
1274 {
1275         u64 physical = bio->bi_sector;
1276         u64 stripe_start;
1277         int i;
1278         struct btrfs_bio_stripe *stripe;
1279 
1280         physical <<= 9;
1281 
1282         for (i = 0; i < rbio->bbio->num_stripes; i++) {
1283                 stripe = &rbio->bbio->stripes[i];
1284                 stripe_start = stripe->physical;
1285                 if (physical >= stripe_start &&
1286                     physical < stripe_start + rbio->stripe_len) {
1287                         return i;
1288                 }
1289         }
1290         return -1;
1291 }
1292 
1293 /*
1294  * helper to find the stripe number for a given
1295  * bio (before mapping).  Used to figure out which stripe has
1296  * failed.  This looks up based on logical block numbers.
1297  */
1298 static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
1299                                    struct bio *bio)
1300 {
1301         u64 logical = bio->bi_sector;
1302         u64 stripe_start;
1303         int i;
1304 
1305         logical <<= 9;
1306 
1307         for (i = 0; i < rbio->nr_data; i++) {
1308                 stripe_start = rbio->raid_map[i];
1309                 if (logical >= stripe_start &&
1310                     logical < stripe_start + rbio->stripe_len) {
1311                         return i;
1312                 }
1313         }
1314         return -1;
1315 }
1316 
1317 /*
1318  * returns -EIO if we had too many failures
1319  */
1320 static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed)
1321 {
1322         unsigned long flags;
1323         int ret = 0;
1324 
1325         spin_lock_irqsave(&rbio->bio_list_lock, flags);
1326 
1327         /* we already know this stripe is bad, move on */
1328         if (rbio->faila == failed || rbio->failb == failed)
1329                 goto out;
1330 
1331         if (rbio->faila == -1) {
1332                 /* first failure on this rbio */
1333                 rbio->faila = failed;
1334                 atomic_inc(&rbio->bbio->error);
1335         } else if (rbio->failb == -1) {
1336                 /* second failure on this rbio */
1337                 rbio->failb = failed;
1338                 atomic_inc(&rbio->bbio->error);
1339         } else {
1340                 ret = -EIO;
1341         }
1342 out:
1343         spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
1344 
1345         return ret;
1346 }
1347 
1348 /*
1349  * helper to fail a stripe based on a physical disk
1350  * bio.
1351  */
1352 static int fail_bio_stripe(struct btrfs_raid_bio *rbio,
1353                            struct bio *bio)
1354 {
1355         int failed = find_bio_stripe(rbio, bio);
1356 
1357         if (failed < 0)
1358                 return -EIO;
1359 
1360         return fail_rbio_index(rbio, failed);
1361 }
1362 
1363 /*
1364  * this sets each page in the bio uptodate.  It should only be used on private
1365  * rbio pages, nothing that comes in from the higher layers
1366  */
1367 static void set_bio_pages_uptodate(struct bio *bio)
1368 {
1369         int i;
1370         struct page *p;
1371 
1372         for (i = 0; i < bio->bi_vcnt; i++) {
1373                 p = bio->bi_io_vec[i].bv_page;
1374                 SetPageUptodate(p);
1375         }
1376 }
1377 
1378 /*
1379  * end io for the read phase of the rmw cycle.  All the bios here are physical
1380  * stripe bios we've read from the disk so we can recalculate the parity of the
1381  * stripe.
1382  *
1383  * This will usually kick off finish_rmw once all the bios are read in, but it
1384  * may trigger parity reconstruction if we had any errors along the way
1385  */
1386 static void raid_rmw_end_io(struct bio *bio, int err)
1387 {
1388         struct btrfs_raid_bio *rbio = bio->bi_private;
1389 
1390         if (err)
1391                 fail_bio_stripe(rbio, bio);
1392         else
1393                 set_bio_pages_uptodate(bio);
1394 
1395         bio_put(bio);
1396 
1397         if (!atomic_dec_and_test(&rbio->bbio->stripes_pending))
1398                 return;
1399 
1400         err = 0;
1401         if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors)
1402                 goto cleanup;
1403 
1404         /*
1405          * this will normally call finish_rmw to start our write
1406          * but if there are any failed stripes we'll reconstruct
1407          * from parity first
1408          */
1409         validate_rbio_for_rmw(rbio);
1410         return;
1411 
1412 cleanup:
1413 
1414         rbio_orig_end_io(rbio, -EIO, 0);
1415 }
1416 
1417 static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
1418 {
1419         rbio->work.flags = 0;
1420         rbio->work.func = rmw_work;
1421 
1422         btrfs_queue_worker(&rbio->fs_info->rmw_workers,
1423                            &rbio->work);
1424 }
1425 
1426 static void async_read_rebuild(struct btrfs_raid_bio *rbio)
1427 {
1428         rbio->work.flags = 0;
1429         rbio->work.func = read_rebuild_work;
1430 
1431         btrfs_queue_worker(&rbio->fs_info->rmw_workers,
1432                            &rbio->work);
1433 }
1434 
1435 /*
1436  * the stripe must be locked by the caller.  It will
1437  * unlock after all the writes are done
1438  */
1439 static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
1440 {
1441         int bios_to_read = 0;
1442         struct btrfs_bio *bbio = rbio->bbio;
1443         struct bio_list bio_list;
1444         int ret;
1445         int nr_pages = (rbio->stripe_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1446         int pagenr;
1447         int stripe;
1448         struct bio *bio;
1449 
1450         bio_list_init(&bio_list);
1451 
1452         ret = alloc_rbio_pages(rbio);
1453         if (ret)
1454                 goto cleanup;
1455 
1456         index_rbio_pages(rbio);
1457 
1458         atomic_set(&rbio->bbio->error, 0);
1459         /*
1460          * build a list of bios to read all the missing parts of this
1461          * stripe
1462          */
1463         for (stripe = 0; stripe < rbio->nr_data; stripe++) {
1464                 for (pagenr = 0; pagenr < nr_pages; pagenr++) {
1465                         struct page *page;
1466                         /*
1467                          * we want to find all the pages missing from
1468                          * the rbio and read them from the disk.  If
1469                          * page_in_rbio finds a page in the bio list
1470                          * we don't need to read it off the stripe.
1471                          */
1472                         page = page_in_rbio(rbio, stripe, pagenr, 1);
1473                         if (page)
1474                                 continue;
1475 
1476                         page = rbio_stripe_page(rbio, stripe, pagenr);
1477                         /*
1478                          * the bio cache may have handed us an uptodate
1479                          * page.  If so, be happy and use it
1480                          */
1481                         if (PageUptodate(page))
1482                                 continue;
1483 
1484                         ret = rbio_add_io_page(rbio, &bio_list, page,
1485                                        stripe, pagenr, rbio->stripe_len);
1486                         if (ret)
1487                                 goto cleanup;
1488                 }
1489         }
1490 
1491         bios_to_read = bio_list_size(&bio_list);
1492         if (!bios_to_read) {
1493                 /*
1494                  * this can happen if others have merged with
1495                  * us, it means there is nothing left to read.
1496                  * But if there are missing devices it may not be
1497                  * safe to do the full stripe write yet.
1498                  */
1499                 goto finish;
1500         }
1501 
1502         /*
1503          * the bbio may be freed once we submit the last bio.  Make sure
1504          * not to touch it after that
1505          */
1506         atomic_set(&bbio->stripes_pending, bios_to_read);
1507         while (1) {
1508                 bio = bio_list_pop(&bio_list);
1509                 if (!bio)
1510                         break;
1511 
1512                 bio->bi_private = rbio;
1513                 bio->bi_end_io = raid_rmw_end_io;
1514 
1515                 btrfs_bio_wq_end_io(rbio->fs_info, bio,
1516                                     BTRFS_WQ_ENDIO_RAID56);
1517 
1518                 BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
1519                 submit_bio(READ, bio);
1520         }
1521         /* the actual write will happen once the reads are done */
1522         return 0;
1523 
1524 cleanup:
1525         rbio_orig_end_io(rbio, -EIO, 0);
1526         return -EIO;
1527 
1528 finish:
1529         validate_rbio_for_rmw(rbio);
1530         return 0;
1531 }
1532 
1533 /*
1534  * if the upper layers pass in a full stripe, we thank them by only allocating
1535  * enough pages to hold the parity, and sending it all down quickly.
1536  */
1537 static int full_stripe_write(struct btrfs_raid_bio *rbio)
1538 {
1539         int ret;
1540 
1541         ret = alloc_rbio_parity_pages(rbio);
1542         if (ret) {
1543                 __free_raid_bio(rbio);
1544                 return ret;
1545         }
1546 
1547         ret = lock_stripe_add(rbio);
1548         if (ret == 0)
1549                 finish_rmw(rbio);
1550         return 0;
1551 }
1552 
1553 /*
1554  * partial stripe writes get handed over to async helpers.
1555  * We're really hoping to merge a few more writes into this
1556  * rbio before calculating new parity
1557  */
1558 static int partial_stripe_write(struct btrfs_raid_bio *rbio)
1559 {
1560         int ret;
1561 
1562         ret = lock_stripe_add(rbio);
1563         if (ret == 0)
1564                 async_rmw_stripe(rbio);
1565         return 0;
1566 }
1567 
1568 /*
1569  * sometimes while we were reading from the drive to
1570  * recalculate parity, enough new bios come into create
1571  * a full stripe.  So we do a check here to see if we can
1572  * go directly to finish_rmw
1573  */
1574 static int __raid56_parity_write(struct btrfs_raid_bio *rbio)
1575 {
1576         /* head off into rmw land if we don't have a full stripe */
1577         if (!rbio_is_full(rbio))
1578                 return partial_stripe_write(rbio);
1579         return full_stripe_write(rbio);
1580 }
1581 
1582 /*
1583  * We use plugging call backs to collect full stripes.
1584  * Any time we get a partial stripe write while plugged
1585  * we collect it into a list.  When the unplug comes down,
1586  * we sort the list by logical block number and merge
1587  * everything we can into the same rbios
1588  */
1589 struct btrfs_plug_cb {
1590         struct blk_plug_cb cb;
1591         struct btrfs_fs_info *info;
1592         struct list_head rbio_list;
1593         struct btrfs_work work;
1594 };
1595 
1596 /*
1597  * rbios on the plug list are sorted for easier merging.
1598  */
1599 static int plug_cmp(void *priv, struct list_head *a, struct list_head *b)
1600 {
1601         struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
1602                                                  plug_list);
1603         struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
1604                                                  plug_list);
1605         u64 a_sector = ra->bio_list.head->bi_sector;
1606         u64 b_sector = rb->bio_list.head->bi_sector;
1607 
1608         if (a_sector < b_sector)
1609                 return -1;
1610         if (a_sector > b_sector)
1611                 return 1;
1612         return 0;
1613 }
1614 
1615 static void run_plug(struct btrfs_plug_cb *plug)
1616 {
1617         struct btrfs_raid_bio *cur;
1618         struct btrfs_raid_bio *last = NULL;
1619 
1620         /*
1621          * sort our plug list then try to merge
1622          * everything we can in hopes of creating full
1623          * stripes.
1624          */
1625         list_sort(NULL, &plug->rbio_list, plug_cmp);
1626         while (!list_empty(&plug->rbio_list)) {
1627                 cur = list_entry(plug->rbio_list.next,
1628                                  struct btrfs_raid_bio, plug_list);
1629                 list_del_init(&cur->plug_list);
1630 
1631                 if (rbio_is_full(cur)) {
1632                         /* we have a full stripe, send it down */
1633                         full_stripe_write(cur);
1634                         continue;
1635                 }
1636                 if (last) {
1637                         if (rbio_can_merge(last, cur)) {
1638                                 merge_rbio(last, cur);
1639                                 __free_raid_bio(cur);
1640                                 continue;
1641 
1642                         }
1643                         __raid56_parity_write(last);
1644                 }
1645                 last = cur;
1646         }
1647         if (last) {
1648                 __raid56_parity_write(last);
1649         }
1650         kfree(plug);
1651 }
1652 
1653 /*
1654  * if the unplug comes from schedule, we have to push the
1655  * work off to a helper thread
1656  */
1657 static void unplug_work(struct btrfs_work *work)
1658 {
1659         struct btrfs_plug_cb *plug;
1660         plug = container_of(work, struct btrfs_plug_cb, work);
1661         run_plug(plug);
1662 }
1663 
1664 static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
1665 {
1666         struct btrfs_plug_cb *plug;
1667         plug = container_of(cb, struct btrfs_plug_cb, cb);
1668 
1669         if (from_schedule) {
1670                 plug->work.flags = 0;
1671                 plug->work.func = unplug_work;
1672                 btrfs_queue_worker(&plug->info->rmw_workers,
1673                                    &plug->work);
1674                 return;
1675         }
1676         run_plug(plug);
1677 }
1678 
1679 /*
1680  * our main entry point for writes from the rest of the FS.
1681  */
1682 int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
1683                         struct btrfs_bio *bbio, u64 *raid_map,
1684                         u64 stripe_len)
1685 {
1686         struct btrfs_raid_bio *rbio;
1687         struct btrfs_plug_cb *plug = NULL;
1688         struct blk_plug_cb *cb;
1689 
1690         rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
1691         if (IS_ERR(rbio))
1692                 return PTR_ERR(rbio);
1693         bio_list_add(&rbio->bio_list, bio);
1694         rbio->bio_list_bytes = bio->bi_size;
1695 
1696         /*
1697          * don't plug on full rbios, just get them out the door
1698          * as quickly as we can
1699          */
1700         if (rbio_is_full(rbio))
1701                 return full_stripe_write(rbio);
1702 
1703         cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info,
1704                                sizeof(*plug));
1705         if (cb) {
1706                 plug = container_of(cb, struct btrfs_plug_cb, cb);
1707                 if (!plug->info) {
1708                         plug->info = root->fs_info;
1709                         INIT_LIST_HEAD(&plug->rbio_list);
1710                 }
1711                 list_add_tail(&rbio->plug_list, &plug->rbio_list);
1712         } else {
1713                 return __raid56_parity_write(rbio);
1714         }
1715         return 0;
1716 }
1717 
1718 /*
1719  * all parity reconstruction happens here.  We've read in everything
1720  * we can find from the drives and this does the heavy lifting of
1721  * sorting the good from the bad.
1722  */
1723 static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1724 {
1725         int pagenr, stripe;
1726         void **pointers;
1727         int faila = -1, failb = -1;
1728         int nr_pages = (rbio->stripe_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1729         struct page *page;
1730         int err;
1731         int i;
1732 
1733         pointers = kzalloc(rbio->bbio->num_stripes * sizeof(void *),
1734                            GFP_NOFS);
1735         if (!pointers) {
1736                 err = -ENOMEM;
1737                 goto cleanup_io;
1738         }
1739 
1740         faila = rbio->faila;
1741         failb = rbio->failb;
1742 
1743         if (rbio->read_rebuild) {
1744                 spin_lock_irq(&rbio->bio_list_lock);
1745                 set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
1746                 spin_unlock_irq(&rbio->bio_list_lock);
1747         }
1748 
1749         index_rbio_pages(rbio);
1750 
1751         for (pagenr = 0; pagenr < nr_pages; pagenr++) {
1752                 /* setup our array of pointers with pages
1753                  * from each stripe
1754                  */
1755                 for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) {
1756                         /*
1757                          * if we're rebuilding a read, we have to use
1758                          * pages from the bio list
1759                          */
1760                         if (rbio->read_rebuild &&
1761                             (stripe == faila || stripe == failb)) {
1762                                 page = page_in_rbio(rbio, stripe, pagenr, 0);
1763                         } else {
1764                                 page = rbio_stripe_page(rbio, stripe, pagenr);
1765                         }
1766                         pointers[stripe] = kmap(page);
1767                 }
1768 
1769                 /* all raid6 handling here */
1770                 if (rbio->raid_map[rbio->bbio->num_stripes - 1] ==
1771                     RAID6_Q_STRIPE) {
1772 
1773                         /*
1774                          * single failure, rebuild from parity raid5
1775                          * style
1776                          */
1777                         if (failb < 0) {
1778                                 if (faila == rbio->nr_data) {
1779                                         /*
1780                                          * Just the P stripe has failed, without
1781                                          * a bad data or Q stripe.
1782                                          * TODO, we should redo the xor here.
1783                                          */
1784                                         err = -EIO;
1785                                         goto cleanup;
1786                                 }
1787                                 /*
1788                                  * a single failure in raid6 is rebuilt
1789                                  * in the pstripe code below
1790                                  */
1791                                 goto pstripe;
1792                         }
1793 
1794                         /* make sure our ps and qs are in order */
1795                         if (faila > failb) {
1796                                 int tmp = failb;
1797                                 failb = faila;
1798                                 faila = tmp;
1799                         }
1800 
1801                         /* if the q stripe is failed, do a pstripe reconstruction
1802                          * from the xors.
1803                          * If both the q stripe and the P stripe are failed, we're
1804                          * here due to a crc mismatch and we can't give them the
1805                          * data they want
1806                          */
1807                         if (rbio->raid_map[failb] == RAID6_Q_STRIPE) {
1808                                 if (rbio->raid_map[faila] == RAID5_P_STRIPE) {
1809                                         err = -EIO;
1810                                         goto cleanup;
1811                                 }
1812                                 /*
1813                                  * otherwise we have one bad data stripe and
1814                                  * a good P stripe.  raid5!
1815                                  */
1816                                 goto pstripe;
1817                         }
1818 
1819                         if (rbio->raid_map[failb] == RAID5_P_STRIPE) {
1820                                 raid6_datap_recov(rbio->bbio->num_stripes,
1821                                                   PAGE_SIZE, faila, pointers);
1822                         } else {
1823                                 raid6_2data_recov(rbio->bbio->num_stripes,
1824                                                   PAGE_SIZE, faila, failb,
1825                                                   pointers);
1826                         }
1827                 } else {
1828                         void *p;
1829 
1830                         /* rebuild from P stripe here (raid5 or raid6) */
1831                         BUG_ON(failb != -1);
1832 pstripe:
1833                         /* Copy parity block into failed block to start with */
1834                         memcpy(pointers[faila],
1835                                pointers[rbio->nr_data],
1836                                PAGE_CACHE_SIZE);
1837 
1838                         /* rearrange the pointer array */
1839                         p = pointers[faila];
1840                         for (stripe = faila; stripe < rbio->nr_data - 1; stripe++)
1841                                 pointers[stripe] = pointers[stripe + 1];
1842                         pointers[rbio->nr_data - 1] = p;
1843 
1844                         /* xor in the rest */
1845                         run_xor(pointers, rbio->nr_data - 1, PAGE_CACHE_SIZE);
1846                 }
1847                 /* if we're doing this rebuild as part of an rmw, go through
1848                  * and set all of our private rbio pages in the
1849                  * failed stripes as uptodate.  This way finish_rmw will
1850                  * know they can be trusted.  If this was a read reconstruction,
1851                  * other endio functions will fiddle the uptodate bits
1852                  */
1853                 if (!rbio->read_rebuild) {
1854                         for (i = 0;  i < nr_pages; i++) {
1855                                 if (faila != -1) {
1856                                         page = rbio_stripe_page(rbio, faila, i);
1857                                         SetPageUptodate(page);
1858                                 }
1859                                 if (failb != -1) {
1860                                         page = rbio_stripe_page(rbio, failb, i);
1861                                         SetPageUptodate(page);
1862                                 }
1863                         }
1864                 }
1865                 for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) {
1866                         /*
1867                          * if we're rebuilding a read, we have to use
1868                          * pages from the bio list
1869                          */
1870                         if (rbio->read_rebuild &&
1871                             (stripe == faila || stripe == failb)) {
1872                                 page = page_in_rbio(rbio, stripe, pagenr, 0);
1873                         } else {
1874                                 page = rbio_stripe_page(rbio, stripe, pagenr);
1875                         }
1876                         kunmap(page);
1877                 }
1878         }
1879 
1880         err = 0;
1881 cleanup:
1882         kfree(pointers);
1883 
1884 cleanup_io:
1885 
1886         if (rbio->read_rebuild) {
1887                 if (err == 0)
1888                         cache_rbio_pages(rbio);
1889                 else
1890                         clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
1891 
1892                 rbio_orig_end_io(rbio, err, err == 0);
1893         } else if (err == 0) {
1894                 rbio->faila = -1;
1895                 rbio->failb = -1;
1896                 finish_rmw(rbio);
1897         } else {
1898                 rbio_orig_end_io(rbio, err, 0);
1899         }
1900 }
1901 
1902 /*
1903  * This is called only for stripes we've read from disk to
1904  * reconstruct the parity.
1905  */
1906 static void raid_recover_end_io(struct bio *bio, int err)
1907 {
1908         struct btrfs_raid_bio *rbio = bio->bi_private;
1909 
1910         /*
1911          * we only read stripe pages off the disk, set them
1912          * up to date if there were no errors
1913          */
1914         if (err)
1915                 fail_bio_stripe(rbio, bio);
1916         else
1917                 set_bio_pages_uptodate(bio);
1918         bio_put(bio);
1919 
1920         if (!atomic_dec_and_test(&rbio->bbio->stripes_pending))
1921                 return;
1922 
1923         if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors)
1924                 rbio_orig_end_io(rbio, -EIO, 0);
1925         else
1926                 __raid_recover_end_io(rbio);
1927 }
1928 
1929 /*
1930  * reads everything we need off the disk to reconstruct
1931  * the parity. endio handlers trigger final reconstruction
1932  * when the IO is done.
1933  *
1934  * This is used both for reads from the higher layers and for
1935  * parity construction required to finish a rmw cycle.
1936  */
1937 static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
1938 {
1939         int bios_to_read = 0;
1940         struct btrfs_bio *bbio = rbio->bbio;
1941         struct bio_list bio_list;
1942         int ret;
1943         int nr_pages = (rbio->stripe_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1944         int pagenr;
1945         int stripe;
1946         struct bio *bio;
1947 
1948         bio_list_init(&bio_list);
1949 
1950         ret = alloc_rbio_pages(rbio);
1951         if (ret)
1952                 goto cleanup;
1953 
1954         atomic_set(&rbio->bbio->error, 0);
1955 
1956         /*
1957          * read everything that hasn't failed.  Thanks to the
1958          * stripe cache, it is possible that some or all of these
1959          * pages are going to be uptodate.
1960          */
1961         for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
1962                 if (rbio->faila == stripe ||
1963                     rbio->failb == stripe)
1964                         continue;
1965 
1966                 for (pagenr = 0; pagenr < nr_pages; pagenr++) {
1967                         struct page *p;
1968 
1969                         /*
1970                          * the rmw code may have already read this
1971                          * page in
1972                          */
1973                         p = rbio_stripe_page(rbio, stripe, pagenr);
1974                         if (PageUptodate(p))
1975                                 continue;
1976 
1977                         ret = rbio_add_io_page(rbio, &bio_list,
1978                                        rbio_stripe_page(rbio, stripe, pagenr),
1979                                        stripe, pagenr, rbio->stripe_len);
1980                         if (ret < 0)
1981                                 goto cleanup;
1982                 }
1983         }
1984 
1985         bios_to_read = bio_list_size(&bio_list);
1986         if (!bios_to_read) {
1987                 /*
1988                  * we might have no bios to read just because the pages
1989                  * were up to date, or we might have no bios to read because
1990                  * the devices were gone.
1991                  */
1992                 if (atomic_read(&rbio->bbio->error) <= rbio->bbio->max_errors) {
1993                         __raid_recover_end_io(rbio);
1994                         goto out;
1995                 } else {
1996                         goto cleanup;
1997                 }
1998         }
1999 
2000         /*
2001          * the bbio may be freed once we submit the last bio.  Make sure
2002          * not to touch it after that
2003          */
2004         atomic_set(&bbio->stripes_pending, bios_to_read);
2005         while (1) {
2006                 bio = bio_list_pop(&bio_list);
2007                 if (!bio)
2008                         break;
2009 
2010                 bio->bi_private = rbio;
2011                 bio->bi_end_io = raid_recover_end_io;
2012 
2013                 btrfs_bio_wq_end_io(rbio->fs_info, bio,
2014                                     BTRFS_WQ_ENDIO_RAID56);
2015 
2016                 BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
2017                 submit_bio(READ, bio);
2018         }
2019 out:
2020         return 0;
2021 
2022 cleanup:
2023         if (rbio->read_rebuild)
2024                 rbio_orig_end_io(rbio, -EIO, 0);
2025         return -EIO;
2026 }
2027 
2028 /*
2029  * the main entry point for reads from the higher layers.  This
2030  * is really only called when the normal read path had a failure,
2031  * so we assume the bio they send down corresponds to a failed part
2032  * of the drive.
2033  */
2034 int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2035                           struct btrfs_bio *bbio, u64 *raid_map,
2036                           u64 stripe_len, int mirror_num)
2037 {
2038         struct btrfs_raid_bio *rbio;
2039         int ret;
2040 
2041         rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
2042         if (IS_ERR(rbio))
2043                 return PTR_ERR(rbio);
2044 
2045         rbio->read_rebuild = 1;
2046         bio_list_add(&rbio->bio_list, bio);
2047         rbio->bio_list_bytes = bio->bi_size;
2048 
2049         rbio->faila = find_logical_bio_stripe(rbio, bio);
2050         if (rbio->faila == -1) {
2051                 BUG();
2052                 kfree(raid_map);
2053                 kfree(bbio);
2054                 kfree(rbio);
2055                 return -EIO;
2056         }
2057 
2058         /*
2059          * reconstruct from the q stripe if they are
2060          * asking for mirror 3
2061          */
2062         if (mirror_num == 3)
2063                 rbio->failb = bbio->num_stripes - 2;
2064 
2065         ret = lock_stripe_add(rbio);
2066 
2067         /*
2068          * __raid56_parity_recover will end the bio with
2069          * any errors it hits.  We don't want to return
2070          * its error value up the stack because our caller
2071          * will end up calling bio_endio with any nonzero
2072          * return
2073          */
2074         if (ret == 0)
2075                 __raid56_parity_recover(rbio);
2076         /*
2077          * our rbio has been added to the list of
2078          * rbios that will be handled after the
2079          * currently lock owner is done
2080          */
2081         return 0;
2082 
2083 }
2084 
2085 static void rmw_work(struct btrfs_work *work)
2086 {
2087         struct btrfs_raid_bio *rbio;
2088 
2089         rbio = container_of(work, struct btrfs_raid_bio, work);
2090         raid56_rmw_stripe(rbio);
2091 }
2092 
2093 static void read_rebuild_work(struct btrfs_work *work)
2094 {
2095         struct btrfs_raid_bio *rbio;
2096 
2097         rbio = container_of(work, struct btrfs_raid_bio, work);
2098         __raid56_parity_recover(rbio);
2099 }
2100 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp