~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/jbd/commit.c

Version: ~ [ linux-5.4-rc7 ] ~ [ linux-5.3.11 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.84 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.154 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.201 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.201 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.77 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * linux/fs/jbd/commit.c
  3  *
  4  * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
  5  *
  6  * Copyright 1998 Red Hat corp --- All Rights Reserved
  7  *
  8  * This file is part of the Linux kernel and is made available under
  9  * the terms of the GNU General Public License, version 2, or at your
 10  * option, any later version, incorporated herein by reference.
 11  *
 12  * Journal commit routines for the generic filesystem journaling code;
 13  * part of the ext2fs journaling system.
 14  */
 15 
 16 #include <linux/time.h>
 17 #include <linux/fs.h>
 18 #include <linux/jbd.h>
 19 #include <linux/errno.h>
 20 #include <linux/slab.h>
 21 #include <linux/mm.h>
 22 #include <linux/pagemap.h>
 23 #include <linux/bio.h>
 24 
 25 /*
 26  * Default IO end handler for temporary BJ_IO buffer_heads.
 27  */
 28 static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 29 {
 30         BUFFER_TRACE(bh, "");
 31         if (uptodate)
 32                 set_buffer_uptodate(bh);
 33         else
 34                 clear_buffer_uptodate(bh);
 35         unlock_buffer(bh);
 36 }
 37 
 38 /*
 39  * When an ext3-ordered file is truncated, it is possible that many pages are
 40  * not successfully freed, because they are attached to a committing transaction.
 41  * After the transaction commits, these pages are left on the LRU, with no
 42  * ->mapping, and with attached buffers.  These pages are trivially reclaimable
 43  * by the VM, but their apparent absence upsets the VM accounting, and it makes
 44  * the numbers in /proc/meminfo look odd.
 45  *
 46  * So here, we have a buffer which has just come off the forget list.  Look to
 47  * see if we can strip all buffers from the backing page.
 48  *
 49  * Called under journal->j_list_lock.  The caller provided us with a ref
 50  * against the buffer, and we drop that here.
 51  */
 52 static void release_buffer_page(struct buffer_head *bh)
 53 {
 54         struct page *page;
 55 
 56         if (buffer_dirty(bh))
 57                 goto nope;
 58         if (atomic_read(&bh->b_count) != 1)
 59                 goto nope;
 60         page = bh->b_page;
 61         if (!page)
 62                 goto nope;
 63         if (page->mapping)
 64                 goto nope;
 65 
 66         /* OK, it's a truncated page */
 67         if (!trylock_page(page))
 68                 goto nope;
 69 
 70         page_cache_get(page);
 71         __brelse(bh);
 72         try_to_free_buffers(page);
 73         unlock_page(page);
 74         page_cache_release(page);
 75         return;
 76 
 77 nope:
 78         __brelse(bh);
 79 }
 80 
 81 /*
 82  * Decrement reference counter for data buffer. If it has been marked
 83  * 'BH_Freed', release it and the page to which it belongs if possible.
 84  */
 85 static void release_data_buffer(struct buffer_head *bh)
 86 {
 87         if (buffer_freed(bh)) {
 88                 WARN_ON_ONCE(buffer_dirty(bh));
 89                 clear_buffer_freed(bh);
 90                 clear_buffer_mapped(bh);
 91                 clear_buffer_new(bh);
 92                 clear_buffer_req(bh);
 93                 bh->b_bdev = NULL;
 94                 release_buffer_page(bh);
 95         } else
 96                 put_bh(bh);
 97 }
 98 
 99 /*
100  * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
101  * held.  For ranking reasons we must trylock.  If we lose, schedule away and
102  * return 0.  j_list_lock is dropped in this case.
103  */
104 static int inverted_lock(journal_t *journal, struct buffer_head *bh)
105 {
106         if (!jbd_trylock_bh_state(bh)) {
107                 spin_unlock(&journal->j_list_lock);
108                 schedule();
109                 return 0;
110         }
111         return 1;
112 }
113 
114 /* Done it all: now write the commit record.  We should have
115  * cleaned up our previous buffers by now, so if we are in abort
116  * mode we can now just skip the rest of the journal write
117  * entirely.
118  *
119  * Returns 1 if the journal needs to be aborted or 0 on success
120  */
121 static int journal_write_commit_record(journal_t *journal,
122                                         transaction_t *commit_transaction)
123 {
124         struct journal_head *descriptor;
125         struct buffer_head *bh;
126         journal_header_t *header;
127         int ret;
128         int barrier_done = 0;
129 
130         if (is_journal_aborted(journal))
131                 return 0;
132 
133         descriptor = journal_get_descriptor_buffer(journal);
134         if (!descriptor)
135                 return 1;
136 
137         bh = jh2bh(descriptor);
138 
139         header = (journal_header_t *)(bh->b_data);
140         header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
141         header->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
142         header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
143 
144         JBUFFER_TRACE(descriptor, "write commit block");
145         set_buffer_dirty(bh);
146         if (journal->j_flags & JFS_BARRIER) {
147                 set_buffer_ordered(bh);
148                 barrier_done = 1;
149         }
150         ret = sync_dirty_buffer(bh);
151         if (barrier_done)
152                 clear_buffer_ordered(bh);
153         /* is it possible for another commit to fail at roughly
154          * the same time as this one?  If so, we don't want to
155          * trust the barrier flag in the super, but instead want
156          * to remember if we sent a barrier request
157          */
158         if (ret == -EOPNOTSUPP && barrier_done) {
159                 char b[BDEVNAME_SIZE];
160 
161                 printk(KERN_WARNING
162                         "JBD: barrier-based sync failed on %s - "
163                         "disabling barriers\n",
164                         bdevname(journal->j_dev, b));
165                 spin_lock(&journal->j_state_lock);
166                 journal->j_flags &= ~JFS_BARRIER;
167                 spin_unlock(&journal->j_state_lock);
168 
169                 /* And try again, without the barrier */
170                 set_buffer_uptodate(bh);
171                 set_buffer_dirty(bh);
172                 ret = sync_dirty_buffer(bh);
173         }
174         put_bh(bh);             /* One for getblk() */
175         journal_put_journal_head(descriptor);
176 
177         return (ret == -EIO);
178 }
179 
180 static void journal_do_submit_data(struct buffer_head **wbuf, int bufs,
181                                    int write_op)
182 {
183         int i;
184 
185         for (i = 0; i < bufs; i++) {
186                 wbuf[i]->b_end_io = end_buffer_write_sync;
187                 /* We use-up our safety reference in submit_bh() */
188                 submit_bh(write_op, wbuf[i]);
189         }
190 }
191 
192 /*
193  *  Submit all the data buffers to disk
194  */
195 static int journal_submit_data_buffers(journal_t *journal,
196                                        transaction_t *commit_transaction,
197                                        int write_op)
198 {
199         struct journal_head *jh;
200         struct buffer_head *bh;
201         int locked;
202         int bufs = 0;
203         struct buffer_head **wbuf = journal->j_wbuf;
204         int err = 0;
205 
206         /*
207          * Whenever we unlock the journal and sleep, things can get added
208          * onto ->t_sync_datalist, so we have to keep looping back to
209          * write_out_data until we *know* that the list is empty.
210          *
211          * Cleanup any flushed data buffers from the data list.  Even in
212          * abort mode, we want to flush this out as soon as possible.
213          */
214 write_out_data:
215         cond_resched();
216         spin_lock(&journal->j_list_lock);
217 
218         while (commit_transaction->t_sync_datalist) {
219                 jh = commit_transaction->t_sync_datalist;
220                 bh = jh2bh(jh);
221                 locked = 0;
222 
223                 /* Get reference just to make sure buffer does not disappear
224                  * when we are forced to drop various locks */
225                 get_bh(bh);
226                 /* If the buffer is dirty, we need to submit IO and hence
227                  * we need the buffer lock. We try to lock the buffer without
228                  * blocking. If we fail, we need to drop j_list_lock and do
229                  * blocking lock_buffer().
230                  */
231                 if (buffer_dirty(bh)) {
232                         if (!trylock_buffer(bh)) {
233                                 BUFFER_TRACE(bh, "needs blocking lock");
234                                 spin_unlock(&journal->j_list_lock);
235                                 /* Write out all data to prevent deadlocks */
236                                 journal_do_submit_data(wbuf, bufs, write_op);
237                                 bufs = 0;
238                                 lock_buffer(bh);
239                                 spin_lock(&journal->j_list_lock);
240                         }
241                         locked = 1;
242                 }
243                 /* We have to get bh_state lock. Again out of order, sigh. */
244                 if (!inverted_lock(journal, bh)) {
245                         jbd_lock_bh_state(bh);
246                         spin_lock(&journal->j_list_lock);
247                 }
248                 /* Someone already cleaned up the buffer? */
249                 if (!buffer_jbd(bh) || bh2jh(bh) != jh
250                         || jh->b_transaction != commit_transaction
251                         || jh->b_jlist != BJ_SyncData) {
252                         jbd_unlock_bh_state(bh);
253                         if (locked)
254                                 unlock_buffer(bh);
255                         BUFFER_TRACE(bh, "already cleaned up");
256                         release_data_buffer(bh);
257                         continue;
258                 }
259                 if (locked && test_clear_buffer_dirty(bh)) {
260                         BUFFER_TRACE(bh, "needs writeout, adding to array");
261                         wbuf[bufs++] = bh;
262                         __journal_file_buffer(jh, commit_transaction,
263                                                 BJ_Locked);
264                         jbd_unlock_bh_state(bh);
265                         if (bufs == journal->j_wbufsize) {
266                                 spin_unlock(&journal->j_list_lock);
267                                 journal_do_submit_data(wbuf, bufs, write_op);
268                                 bufs = 0;
269                                 goto write_out_data;
270                         }
271                 } else if (!locked && buffer_locked(bh)) {
272                         __journal_file_buffer(jh, commit_transaction,
273                                                 BJ_Locked);
274                         jbd_unlock_bh_state(bh);
275                         put_bh(bh);
276                 } else {
277                         BUFFER_TRACE(bh, "writeout complete: unfile");
278                         if (unlikely(!buffer_uptodate(bh)))
279                                 err = -EIO;
280                         __journal_unfile_buffer(jh);
281                         jbd_unlock_bh_state(bh);
282                         if (locked)
283                                 unlock_buffer(bh);
284                         journal_remove_journal_head(bh);
285                         /* One for our safety reference, other for
286                          * journal_remove_journal_head() */
287                         put_bh(bh);
288                         release_data_buffer(bh);
289                 }
290 
291                 if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
292                         spin_unlock(&journal->j_list_lock);
293                         goto write_out_data;
294                 }
295         }
296         spin_unlock(&journal->j_list_lock);
297         journal_do_submit_data(wbuf, bufs, write_op);
298 
299         return err;
300 }
301 
302 /*
303  * journal_commit_transaction
304  *
305  * The primary function for committing a transaction to the log.  This
306  * function is called by the journal thread to begin a complete commit.
307  */
308 void journal_commit_transaction(journal_t *journal)
309 {
310         transaction_t *commit_transaction;
311         struct journal_head *jh, *new_jh, *descriptor;
312         struct buffer_head **wbuf = journal->j_wbuf;
313         int bufs;
314         int flags;
315         int err;
316         unsigned int blocknr;
317         ktime_t start_time;
318         u64 commit_time;
319         char *tagp = NULL;
320         journal_header_t *header;
321         journal_block_tag_t *tag = NULL;
322         int space_left = 0;
323         int first_tag = 0;
324         int tag_flag;
325         int i;
326         int write_op = WRITE;
327 
328         /*
329          * First job: lock down the current transaction and wait for
330          * all outstanding updates to complete.
331          */
332 
333 #ifdef COMMIT_STATS
334         spin_lock(&journal->j_list_lock);
335         summarise_journal_usage(journal);
336         spin_unlock(&journal->j_list_lock);
337 #endif
338 
339         /* Do we need to erase the effects of a prior journal_flush? */
340         if (journal->j_flags & JFS_FLUSHED) {
341                 jbd_debug(3, "super block updated\n");
342                 journal_update_superblock(journal, 1);
343         } else {
344                 jbd_debug(3, "superblock not updated\n");
345         }
346 
347         J_ASSERT(journal->j_running_transaction != NULL);
348         J_ASSERT(journal->j_committing_transaction == NULL);
349 
350         commit_transaction = journal->j_running_transaction;
351         J_ASSERT(commit_transaction->t_state == T_RUNNING);
352 
353         jbd_debug(1, "JBD: starting commit of transaction %d\n",
354                         commit_transaction->t_tid);
355 
356         spin_lock(&journal->j_state_lock);
357         commit_transaction->t_state = T_LOCKED;
358 
359         /*
360          * Use plugged writes here, since we want to submit several before
361          * we unplug the device. We don't do explicit unplugging in here,
362          * instead we rely on sync_buffer() doing the unplug for us.
363          */
364         if (commit_transaction->t_synchronous_commit)
365                 write_op = WRITE_SYNC_PLUG;
366         spin_lock(&commit_transaction->t_handle_lock);
367         while (commit_transaction->t_updates) {
368                 DEFINE_WAIT(wait);
369 
370                 prepare_to_wait(&journal->j_wait_updates, &wait,
371                                         TASK_UNINTERRUPTIBLE);
372                 if (commit_transaction->t_updates) {
373                         spin_unlock(&commit_transaction->t_handle_lock);
374                         spin_unlock(&journal->j_state_lock);
375                         schedule();
376                         spin_lock(&journal->j_state_lock);
377                         spin_lock(&commit_transaction->t_handle_lock);
378                 }
379                 finish_wait(&journal->j_wait_updates, &wait);
380         }
381         spin_unlock(&commit_transaction->t_handle_lock);
382 
383         J_ASSERT (commit_transaction->t_outstanding_credits <=
384                         journal->j_max_transaction_buffers);
385 
386         /*
387          * First thing we are allowed to do is to discard any remaining
388          * BJ_Reserved buffers.  Note, it is _not_ permissible to assume
389          * that there are no such buffers: if a large filesystem
390          * operation like a truncate needs to split itself over multiple
391          * transactions, then it may try to do a journal_restart() while
392          * there are still BJ_Reserved buffers outstanding.  These must
393          * be released cleanly from the current transaction.
394          *
395          * In this case, the filesystem must still reserve write access
396          * again before modifying the buffer in the new transaction, but
397          * we do not require it to remember exactly which old buffers it
398          * has reserved.  This is consistent with the existing behaviour
399          * that multiple journal_get_write_access() calls to the same
400          * buffer are perfectly permissable.
401          */
402         while (commit_transaction->t_reserved_list) {
403                 jh = commit_transaction->t_reserved_list;
404                 JBUFFER_TRACE(jh, "reserved, unused: refile");
405                 /*
406                  * A journal_get_undo_access()+journal_release_buffer() may
407                  * leave undo-committed data.
408                  */
409                 if (jh->b_committed_data) {
410                         struct buffer_head *bh = jh2bh(jh);
411 
412                         jbd_lock_bh_state(bh);
413                         jbd_free(jh->b_committed_data, bh->b_size);
414                         jh->b_committed_data = NULL;
415                         jbd_unlock_bh_state(bh);
416                 }
417                 journal_refile_buffer(journal, jh);
418         }
419 
420         /*
421          * Now try to drop any written-back buffers from the journal's
422          * checkpoint lists.  We do this *before* commit because it potentially
423          * frees some memory
424          */
425         spin_lock(&journal->j_list_lock);
426         __journal_clean_checkpoint_list(journal);
427         spin_unlock(&journal->j_list_lock);
428 
429         jbd_debug (3, "JBD: commit phase 1\n");
430 
431         /*
432          * Switch to a new revoke table.
433          */
434         journal_switch_revoke_table(journal);
435 
436         commit_transaction->t_state = T_FLUSH;
437         journal->j_committing_transaction = commit_transaction;
438         journal->j_running_transaction = NULL;
439         start_time = ktime_get();
440         commit_transaction->t_log_start = journal->j_head;
441         wake_up(&journal->j_wait_transaction_locked);
442         spin_unlock(&journal->j_state_lock);
443 
444         jbd_debug (3, "JBD: commit phase 2\n");
445 
446         /*
447          * Now start flushing things to disk, in the order they appear
448          * on the transaction lists.  Data blocks go first.
449          */
450         err = journal_submit_data_buffers(journal, commit_transaction,
451                                           write_op);
452 
453         /*
454          * Wait for all previously submitted IO to complete.
455          */
456         spin_lock(&journal->j_list_lock);
457         while (commit_transaction->t_locked_list) {
458                 struct buffer_head *bh;
459 
460                 jh = commit_transaction->t_locked_list->b_tprev;
461                 bh = jh2bh(jh);
462                 get_bh(bh);
463                 if (buffer_locked(bh)) {
464                         spin_unlock(&journal->j_list_lock);
465                         wait_on_buffer(bh);
466                         spin_lock(&journal->j_list_lock);
467                 }
468                 if (unlikely(!buffer_uptodate(bh))) {
469                         if (!trylock_page(bh->b_page)) {
470                                 spin_unlock(&journal->j_list_lock);
471                                 lock_page(bh->b_page);
472                                 spin_lock(&journal->j_list_lock);
473                         }
474                         if (bh->b_page->mapping)
475                                 set_bit(AS_EIO, &bh->b_page->mapping->flags);
476 
477                         unlock_page(bh->b_page);
478                         SetPageError(bh->b_page);
479                         err = -EIO;
480                 }
481                 if (!inverted_lock(journal, bh)) {
482                         put_bh(bh);
483                         spin_lock(&journal->j_list_lock);
484                         continue;
485                 }
486                 if (buffer_jbd(bh) && bh2jh(bh) == jh &&
487                     jh->b_transaction == commit_transaction &&
488                     jh->b_jlist == BJ_Locked) {
489                         __journal_unfile_buffer(jh);
490                         jbd_unlock_bh_state(bh);
491                         journal_remove_journal_head(bh);
492                         put_bh(bh);
493                 } else {
494                         jbd_unlock_bh_state(bh);
495                 }
496                 release_data_buffer(bh);
497                 cond_resched_lock(&journal->j_list_lock);
498         }
499         spin_unlock(&journal->j_list_lock);
500 
501         if (err) {
502                 char b[BDEVNAME_SIZE];
503 
504                 printk(KERN_WARNING
505                         "JBD: Detected IO errors while flushing file data "
506                         "on %s\n", bdevname(journal->j_fs_dev, b));
507                 if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR)
508                         journal_abort(journal, err);
509                 err = 0;
510         }
511 
512         journal_write_revoke_records(journal, commit_transaction, write_op);
513 
514         /*
515          * If we found any dirty or locked buffers, then we should have
516          * looped back up to the write_out_data label.  If there weren't
517          * any then journal_clean_data_list should have wiped the list
518          * clean by now, so check that it is in fact empty.
519          */
520         J_ASSERT (commit_transaction->t_sync_datalist == NULL);
521 
522         jbd_debug (3, "JBD: commit phase 3\n");
523 
524         /*
525          * Way to go: we have now written out all of the data for a
526          * transaction!  Now comes the tricky part: we need to write out
527          * metadata.  Loop over the transaction's entire buffer list:
528          */
529         spin_lock(&journal->j_state_lock);
530         commit_transaction->t_state = T_COMMIT;
531         spin_unlock(&journal->j_state_lock);
532 
533         J_ASSERT(commit_transaction->t_nr_buffers <=
534                  commit_transaction->t_outstanding_credits);
535 
536         descriptor = NULL;
537         bufs = 0;
538         while (commit_transaction->t_buffers) {
539 
540                 /* Find the next buffer to be journaled... */
541 
542                 jh = commit_transaction->t_buffers;
543 
544                 /* If we're in abort mode, we just un-journal the buffer and
545                    release it. */
546 
547                 if (is_journal_aborted(journal)) {
548                         clear_buffer_jbddirty(jh2bh(jh));
549                         JBUFFER_TRACE(jh, "journal is aborting: refile");
550                         journal_refile_buffer(journal, jh);
551                         /* If that was the last one, we need to clean up
552                          * any descriptor buffers which may have been
553                          * already allocated, even if we are now
554                          * aborting. */
555                         if (!commit_transaction->t_buffers)
556                                 goto start_journal_io;
557                         continue;
558                 }
559 
560                 /* Make sure we have a descriptor block in which to
561                    record the metadata buffer. */
562 
563                 if (!descriptor) {
564                         struct buffer_head *bh;
565 
566                         J_ASSERT (bufs == 0);
567 
568                         jbd_debug(4, "JBD: get descriptor\n");
569 
570                         descriptor = journal_get_descriptor_buffer(journal);
571                         if (!descriptor) {
572                                 journal_abort(journal, -EIO);
573                                 continue;
574                         }
575 
576                         bh = jh2bh(descriptor);
577                         jbd_debug(4, "JBD: got buffer %llu (%p)\n",
578                                 (unsigned long long)bh->b_blocknr, bh->b_data);
579                         header = (journal_header_t *)&bh->b_data[0];
580                         header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
581                         header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);
582                         header->h_sequence  = cpu_to_be32(commit_transaction->t_tid);
583 
584                         tagp = &bh->b_data[sizeof(journal_header_t)];
585                         space_left = bh->b_size - sizeof(journal_header_t);
586                         first_tag = 1;
587                         set_buffer_jwrite(bh);
588                         set_buffer_dirty(bh);
589                         wbuf[bufs++] = bh;
590 
591                         /* Record it so that we can wait for IO
592                            completion later */
593                         BUFFER_TRACE(bh, "ph3: file as descriptor");
594                         journal_file_buffer(descriptor, commit_transaction,
595                                         BJ_LogCtl);
596                 }
597 
598                 /* Where is the buffer to be written? */
599 
600                 err = journal_next_log_block(journal, &blocknr);
601                 /* If the block mapping failed, just abandon the buffer
602                    and repeat this loop: we'll fall into the
603                    refile-on-abort condition above. */
604                 if (err) {
605                         journal_abort(journal, err);
606                         continue;
607                 }
608 
609                 /*
610                  * start_this_handle() uses t_outstanding_credits to determine
611                  * the free space in the log, but this counter is changed
612                  * by journal_next_log_block() also.
613                  */
614                 commit_transaction->t_outstanding_credits--;
615 
616                 /* Bump b_count to prevent truncate from stumbling over
617                    the shadowed buffer!  @@@ This can go if we ever get
618                    rid of the BJ_IO/BJ_Shadow pairing of buffers. */
619                 atomic_inc(&jh2bh(jh)->b_count);
620 
621                 /* Make a temporary IO buffer with which to write it out
622                    (this will requeue both the metadata buffer and the
623                    temporary IO buffer). new_bh goes on BJ_IO*/
624 
625                 set_bit(BH_JWrite, &jh2bh(jh)->b_state);
626                 /*
627                  * akpm: journal_write_metadata_buffer() sets
628                  * new_bh->b_transaction to commit_transaction.
629                  * We need to clean this up before we release new_bh
630                  * (which is of type BJ_IO)
631                  */
632                 JBUFFER_TRACE(jh, "ph3: write metadata");
633                 flags = journal_write_metadata_buffer(commit_transaction,
634                                                       jh, &new_jh, blocknr);
635                 set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
636                 wbuf[bufs++] = jh2bh(new_jh);
637 
638                 /* Record the new block's tag in the current descriptor
639                    buffer */
640 
641                 tag_flag = 0;
642                 if (flags & 1)
643                         tag_flag |= JFS_FLAG_ESCAPE;
644                 if (!first_tag)
645                         tag_flag |= JFS_FLAG_SAME_UUID;
646 
647                 tag = (journal_block_tag_t *) tagp;
648                 tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
649                 tag->t_flags = cpu_to_be32(tag_flag);
650                 tagp += sizeof(journal_block_tag_t);
651                 space_left -= sizeof(journal_block_tag_t);
652 
653                 if (first_tag) {
654                         memcpy (tagp, journal->j_uuid, 16);
655                         tagp += 16;
656                         space_left -= 16;
657                         first_tag = 0;
658                 }
659 
660                 /* If there's no more to do, or if the descriptor is full,
661                    let the IO rip! */
662 
663                 if (bufs == journal->j_wbufsize ||
664                     commit_transaction->t_buffers == NULL ||
665                     space_left < sizeof(journal_block_tag_t) + 16) {
666 
667                         jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
668 
669                         /* Write an end-of-descriptor marker before
670                            submitting the IOs.  "tag" still points to
671                            the last tag we set up. */
672 
673                         tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG);
674 
675 start_journal_io:
676                         for (i = 0; i < bufs; i++) {
677                                 struct buffer_head *bh = wbuf[i];
678                                 lock_buffer(bh);
679                                 clear_buffer_dirty(bh);
680                                 set_buffer_uptodate(bh);
681                                 bh->b_end_io = journal_end_buffer_io_sync;
682                                 submit_bh(write_op, bh);
683                         }
684                         cond_resched();
685 
686                         /* Force a new descriptor to be generated next
687                            time round the loop. */
688                         descriptor = NULL;
689                         bufs = 0;
690                 }
691         }
692 
693         /* Lo and behold: we have just managed to send a transaction to
694            the log.  Before we can commit it, wait for the IO so far to
695            complete.  Control buffers being written are on the
696            transaction's t_log_list queue, and metadata buffers are on
697            the t_iobuf_list queue.
698 
699            Wait for the buffers in reverse order.  That way we are
700            less likely to be woken up until all IOs have completed, and
701            so we incur less scheduling load.
702         */
703 
704         jbd_debug(3, "JBD: commit phase 4\n");
705 
706         /*
707          * akpm: these are BJ_IO, and j_list_lock is not needed.
708          * See __journal_try_to_free_buffer.
709          */
710 wait_for_iobuf:
711         while (commit_transaction->t_iobuf_list != NULL) {
712                 struct buffer_head *bh;
713 
714                 jh = commit_transaction->t_iobuf_list->b_tprev;
715                 bh = jh2bh(jh);
716                 if (buffer_locked(bh)) {
717                         wait_on_buffer(bh);
718                         goto wait_for_iobuf;
719                 }
720                 if (cond_resched())
721                         goto wait_for_iobuf;
722 
723                 if (unlikely(!buffer_uptodate(bh)))
724                         err = -EIO;
725 
726                 clear_buffer_jwrite(bh);
727 
728                 JBUFFER_TRACE(jh, "ph4: unfile after journal write");
729                 journal_unfile_buffer(journal, jh);
730 
731                 /*
732                  * ->t_iobuf_list should contain only dummy buffer_heads
733                  * which were created by journal_write_metadata_buffer().
734                  */
735                 BUFFER_TRACE(bh, "dumping temporary bh");
736                 journal_put_journal_head(jh);
737                 __brelse(bh);
738                 J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
739                 free_buffer_head(bh);
740 
741                 /* We also have to unlock and free the corresponding
742                    shadowed buffer */
743                 jh = commit_transaction->t_shadow_list->b_tprev;
744                 bh = jh2bh(jh);
745                 clear_bit(BH_JWrite, &bh->b_state);
746                 J_ASSERT_BH(bh, buffer_jbddirty(bh));
747 
748                 /* The metadata is now released for reuse, but we need
749                    to remember it against this transaction so that when
750                    we finally commit, we can do any checkpointing
751                    required. */
752                 JBUFFER_TRACE(jh, "file as BJ_Forget");
753                 journal_file_buffer(jh, commit_transaction, BJ_Forget);
754                 /*
755                  * Wake up any transactions which were waiting for this
756                  * IO to complete. The barrier must be here so that changes
757                  * by journal_file_buffer() take effect before wake_up_bit()
758                  * does the waitqueue check.
759                  */
760                 smp_mb();
761                 wake_up_bit(&bh->b_state, BH_Unshadow);
762                 JBUFFER_TRACE(jh, "brelse shadowed buffer");
763                 __brelse(bh);
764         }
765 
766         J_ASSERT (commit_transaction->t_shadow_list == NULL);
767 
768         jbd_debug(3, "JBD: commit phase 5\n");
769 
770         /* Here we wait for the revoke record and descriptor record buffers */
771  wait_for_ctlbuf:
772         while (commit_transaction->t_log_list != NULL) {
773                 struct buffer_head *bh;
774 
775                 jh = commit_transaction->t_log_list->b_tprev;
776                 bh = jh2bh(jh);
777                 if (buffer_locked(bh)) {
778                         wait_on_buffer(bh);
779                         goto wait_for_ctlbuf;
780                 }
781                 if (cond_resched())
782                         goto wait_for_ctlbuf;
783 
784                 if (unlikely(!buffer_uptodate(bh)))
785                         err = -EIO;
786 
787                 BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
788                 clear_buffer_jwrite(bh);
789                 journal_unfile_buffer(journal, jh);
790                 journal_put_journal_head(jh);
791                 __brelse(bh);           /* One for getblk */
792                 /* AKPM: bforget here */
793         }
794 
795         if (err)
796                 journal_abort(journal, err);
797 
798         jbd_debug(3, "JBD: commit phase 6\n");
799 
800         if (journal_write_commit_record(journal, commit_transaction))
801                 err = -EIO;
802 
803         if (err)
804                 journal_abort(journal, err);
805 
806         /* End of a transaction!  Finally, we can do checkpoint
807            processing: any buffers committed as a result of this
808            transaction can be removed from any checkpoint list it was on
809            before. */
810 
811         jbd_debug(3, "JBD: commit phase 7\n");
812 
813         J_ASSERT(commit_transaction->t_sync_datalist == NULL);
814         J_ASSERT(commit_transaction->t_buffers == NULL);
815         J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
816         J_ASSERT(commit_transaction->t_iobuf_list == NULL);
817         J_ASSERT(commit_transaction->t_shadow_list == NULL);
818         J_ASSERT(commit_transaction->t_log_list == NULL);
819 
820 restart_loop:
821         /*
822          * As there are other places (journal_unmap_buffer()) adding buffers
823          * to this list we have to be careful and hold the j_list_lock.
824          */
825         spin_lock(&journal->j_list_lock);
826         while (commit_transaction->t_forget) {
827                 transaction_t *cp_transaction;
828                 struct buffer_head *bh;
829 
830                 jh = commit_transaction->t_forget;
831                 spin_unlock(&journal->j_list_lock);
832                 bh = jh2bh(jh);
833                 jbd_lock_bh_state(bh);
834                 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
835                         jh->b_transaction == journal->j_running_transaction);
836 
837                 /*
838                  * If there is undo-protected committed data against
839                  * this buffer, then we can remove it now.  If it is a
840                  * buffer needing such protection, the old frozen_data
841                  * field now points to a committed version of the
842                  * buffer, so rotate that field to the new committed
843                  * data.
844                  *
845                  * Otherwise, we can just throw away the frozen data now.
846                  */
847                 if (jh->b_committed_data) {
848                         jbd_free(jh->b_committed_data, bh->b_size);
849                         jh->b_committed_data = NULL;
850                         if (jh->b_frozen_data) {
851                                 jh->b_committed_data = jh->b_frozen_data;
852                                 jh->b_frozen_data = NULL;
853                         }
854                 } else if (jh->b_frozen_data) {
855                         jbd_free(jh->b_frozen_data, bh->b_size);
856                         jh->b_frozen_data = NULL;
857                 }
858 
859                 spin_lock(&journal->j_list_lock);
860                 cp_transaction = jh->b_cp_transaction;
861                 if (cp_transaction) {
862                         JBUFFER_TRACE(jh, "remove from old cp transaction");
863                         __journal_remove_checkpoint(jh);
864                 }
865 
866                 /* Only re-checkpoint the buffer_head if it is marked
867                  * dirty.  If the buffer was added to the BJ_Forget list
868                  * by journal_forget, it may no longer be dirty and
869                  * there's no point in keeping a checkpoint record for
870                  * it. */
871 
872                 /*
873                  * A buffer which has been freed while still being journaled by
874                  * a previous transaction.
875                  */
876                 if (buffer_freed(bh)) {
877                         /*
878                          * If the running transaction is the one containing
879                          * "add to orphan" operation (b_next_transaction !=
880                          * NULL), we have to wait for that transaction to
881                          * commit before we can really get rid of the buffer.
882                          * So just clear b_modified to not confuse transaction
883                          * credit accounting and refile the buffer to
884                          * BJ_Forget of the running transaction. If the just
885                          * committed transaction contains "add to orphan"
886                          * operation, we can completely invalidate the buffer
887                          * now. We are rather throughout in that since the
888                          * buffer may be still accessible when blocksize <
889                          * pagesize and it is attached to the last partial
890                          * page.
891                          */
892                         jh->b_modified = 0;
893                         if (!jh->b_next_transaction) {
894                                 clear_buffer_freed(bh);
895                                 clear_buffer_jbddirty(bh);
896                                 clear_buffer_mapped(bh);
897                                 clear_buffer_new(bh);
898                                 clear_buffer_req(bh);
899                                 bh->b_bdev = NULL;
900                         }
901                 }
902 
903                 if (buffer_jbddirty(bh)) {
904                         JBUFFER_TRACE(jh, "add to new checkpointing trans");
905                         __journal_insert_checkpoint(jh, commit_transaction);
906                         if (is_journal_aborted(journal))
907                                 clear_buffer_jbddirty(bh);
908                         JBUFFER_TRACE(jh, "refile for checkpoint writeback");
909                         __journal_refile_buffer(jh);
910                         jbd_unlock_bh_state(bh);
911                 } else {
912                         J_ASSERT_BH(bh, !buffer_dirty(bh));
913                         /* The buffer on BJ_Forget list and not jbddirty means
914                          * it has been freed by this transaction and hence it
915                          * could not have been reallocated until this
916                          * transaction has committed. *BUT* it could be
917                          * reallocated once we have written all the data to
918                          * disk and before we process the buffer on BJ_Forget
919                          * list. */
920                         JBUFFER_TRACE(jh, "refile or unfile freed buffer");
921                         __journal_refile_buffer(jh);
922                         if (!jh->b_transaction) {
923                                 jbd_unlock_bh_state(bh);
924                                  /* needs a brelse */
925                                 journal_remove_journal_head(bh);
926                                 release_buffer_page(bh);
927                         } else
928                                 jbd_unlock_bh_state(bh);
929                 }
930                 cond_resched_lock(&journal->j_list_lock);
931         }
932         spin_unlock(&journal->j_list_lock);
933         /*
934          * This is a bit sleazy.  We use j_list_lock to protect transition
935          * of a transaction into T_FINISHED state and calling
936          * __journal_drop_transaction(). Otherwise we could race with
937          * other checkpointing code processing the transaction...
938          */
939         spin_lock(&journal->j_state_lock);
940         spin_lock(&journal->j_list_lock);
941         /*
942          * Now recheck if some buffers did not get attached to the transaction
943          * while the lock was dropped...
944          */
945         if (commit_transaction->t_forget) {
946                 spin_unlock(&journal->j_list_lock);
947                 spin_unlock(&journal->j_state_lock);
948                 goto restart_loop;
949         }
950 
951         /* Done with this transaction! */
952 
953         jbd_debug(3, "JBD: commit phase 8\n");
954 
955         J_ASSERT(commit_transaction->t_state == T_COMMIT);
956 
957         commit_transaction->t_state = T_FINISHED;
958         J_ASSERT(commit_transaction == journal->j_committing_transaction);
959         journal->j_commit_sequence = commit_transaction->t_tid;
960         journal->j_committing_transaction = NULL;
961         commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
962 
963         /*
964          * weight the commit time higher than the average time so we don't
965          * react too strongly to vast changes in commit time
966          */
967         if (likely(journal->j_average_commit_time))
968                 journal->j_average_commit_time = (commit_time*3 +
969                                 journal->j_average_commit_time) / 4;
970         else
971                 journal->j_average_commit_time = commit_time;
972 
973         spin_unlock(&journal->j_state_lock);
974 
975         if (commit_transaction->t_checkpoint_list == NULL &&
976             commit_transaction->t_checkpoint_io_list == NULL) {
977                 __journal_drop_transaction(journal, commit_transaction);
978         } else {
979                 if (journal->j_checkpoint_transactions == NULL) {
980                         journal->j_checkpoint_transactions = commit_transaction;
981                         commit_transaction->t_cpnext = commit_transaction;
982                         commit_transaction->t_cpprev = commit_transaction;
983                 } else {
984                         commit_transaction->t_cpnext =
985                                 journal->j_checkpoint_transactions;
986                         commit_transaction->t_cpprev =
987                                 commit_transaction->t_cpnext->t_cpprev;
988                         commit_transaction->t_cpnext->t_cpprev =
989                                 commit_transaction;
990                         commit_transaction->t_cpprev->t_cpnext =
991                                 commit_transaction;
992                 }
993         }
994         spin_unlock(&journal->j_list_lock);
995 
996         jbd_debug(1, "JBD: commit %d complete, head %d\n",
997                   journal->j_commit_sequence, journal->j_tail_sequence);
998 
999         wake_up(&journal->j_wait_done_commit);
1000 }
1001 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp