~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/dlm/recoverd.c

Version: ~ [ linux-5.4.2 ] ~ [ linux-5.3.15 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.88 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.158 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.206 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.206 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.78 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /******************************************************************************
  2 *******************************************************************************
  3 **
  4 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
  5 **  Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
  6 **
  7 **  This copyrighted material is made available to anyone wishing to use,
  8 **  modify, copy, or redistribute it subject to the terms and conditions
  9 **  of the GNU General Public License v.2.
 10 **
 11 *******************************************************************************
 12 ******************************************************************************/
 13 
 14 #include "dlm_internal.h"
 15 #include "lockspace.h"
 16 #include "member.h"
 17 #include "dir.h"
 18 #include "ast.h"
 19 #include "recover.h"
 20 #include "lowcomms.h"
 21 #include "lock.h"
 22 #include "requestqueue.h"
 23 #include "recoverd.h"
 24 
 25 
 26 /* If the start for which we're re-enabling locking (seq) has been superseded
 27    by a newer stop (ls_recover_seq), we need to leave locking disabled.
 28 
 29    We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees
 30    locking stopped and b) adds a message to the requestqueue, but dlm_recoverd
 31    enables locking and clears the requestqueue between a and b. */
 32 
 33 static int enable_locking(struct dlm_ls *ls, uint64_t seq)
 34 {
 35         int error = -EINTR;
 36 
 37         down_write(&ls->ls_recv_active);
 38 
 39         spin_lock(&ls->ls_recover_lock);
 40         if (ls->ls_recover_seq == seq) {
 41                 set_bit(LSFL_RUNNING, &ls->ls_flags);
 42                 /* unblocks processes waiting to enter the dlm */
 43                 up_write(&ls->ls_in_recovery);
 44                 clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
 45                 error = 0;
 46         }
 47         spin_unlock(&ls->ls_recover_lock);
 48 
 49         up_write(&ls->ls_recv_active);
 50         return error;
 51 }
 52 
 53 static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 54 {
 55         unsigned long start;
 56         int error, neg = 0;
 57 
 58         log_debug(ls, "dlm_recover %llu", (unsigned long long)rv->seq);
 59 
 60         mutex_lock(&ls->ls_recoverd_active);
 61 
 62         dlm_callback_suspend(ls);
 63 
 64         dlm_clear_toss(ls);
 65 
 66         /*
 67          * This list of root rsb's will be the basis of most of the recovery
 68          * routines.
 69          */
 70 
 71         dlm_create_root_list(ls);
 72 
 73         /*
 74          * Add or remove nodes from the lockspace's ls_nodes list.
 75          */
 76 
 77         error = dlm_recover_members(ls, rv, &neg);
 78         if (error) {
 79                 log_debug(ls, "dlm_recover_members error %d", error);
 80                 goto fail;
 81         }
 82 
 83         dlm_recover_dir_nodeid(ls);
 84 
 85         ls->ls_recover_dir_sent_res = 0;
 86         ls->ls_recover_dir_sent_msg = 0;
 87         ls->ls_recover_locks_in = 0;
 88 
 89         dlm_set_recover_status(ls, DLM_RS_NODES);
 90 
 91         error = dlm_recover_members_wait(ls);
 92         if (error) {
 93                 log_debug(ls, "dlm_recover_members_wait error %d", error);
 94                 goto fail;
 95         }
 96 
 97         start = jiffies;
 98 
 99         /*
100          * Rebuild our own share of the directory by collecting from all other
101          * nodes their master rsb names that hash to us.
102          */
103 
104         error = dlm_recover_directory(ls);
105         if (error) {
106                 log_debug(ls, "dlm_recover_directory error %d", error);
107                 goto fail;
108         }
109 
110         dlm_set_recover_status(ls, DLM_RS_DIR);
111 
112         error = dlm_recover_directory_wait(ls);
113         if (error) {
114                 log_debug(ls, "dlm_recover_directory_wait error %d", error);
115                 goto fail;
116         }
117 
118         log_debug(ls, "dlm_recover_directory %u out %u messages",
119                   ls->ls_recover_dir_sent_res, ls->ls_recover_dir_sent_msg);
120 
121         /*
122          * We may have outstanding operations that are waiting for a reply from
123          * a failed node.  Mark these to be resent after recovery.  Unlock and
124          * cancel ops can just be completed.
125          */
126 
127         dlm_recover_waiters_pre(ls);
128 
129         error = dlm_recovery_stopped(ls);
130         if (error)
131                 goto fail;
132 
133         if (neg || dlm_no_directory(ls)) {
134                 /*
135                  * Clear lkb's for departed nodes.
136                  */
137 
138                 dlm_recover_purge(ls);
139 
140                 /*
141                  * Get new master nodeid's for rsb's that were mastered on
142                  * departed nodes.
143                  */
144 
145                 error = dlm_recover_masters(ls);
146                 if (error) {
147                         log_debug(ls, "dlm_recover_masters error %d", error);
148                         goto fail;
149                 }
150 
151                 /*
152                  * Send our locks on remastered rsb's to the new masters.
153                  */
154 
155                 error = dlm_recover_locks(ls);
156                 if (error) {
157                         log_debug(ls, "dlm_recover_locks error %d", error);
158                         goto fail;
159                 }
160 
161                 dlm_set_recover_status(ls, DLM_RS_LOCKS);
162 
163                 error = dlm_recover_locks_wait(ls);
164                 if (error) {
165                         log_debug(ls, "dlm_recover_locks_wait error %d", error);
166                         goto fail;
167                 }
168 
169                 log_debug(ls, "dlm_recover_locks %u in",
170                           ls->ls_recover_locks_in);
171 
172                 /*
173                  * Finalize state in master rsb's now that all locks can be
174                  * checked.  This includes conversion resolution and lvb
175                  * settings.
176                  */
177 
178                 dlm_recover_rsbs(ls);
179         } else {
180                 /*
181                  * Other lockspace members may be going through the "neg" steps
182                  * while also adding us to the lockspace, in which case they'll
183                  * be doing the recover_locks (RS_LOCKS) barrier.
184                  */
185                 dlm_set_recover_status(ls, DLM_RS_LOCKS);
186 
187                 error = dlm_recover_locks_wait(ls);
188                 if (error) {
189                         log_debug(ls, "dlm_recover_locks_wait error %d", error);
190                         goto fail;
191                 }
192         }
193 
194         dlm_release_root_list(ls);
195 
196         /*
197          * Purge directory-related requests that are saved in requestqueue.
198          * All dir requests from before recovery are invalid now due to the dir
199          * rebuild and will be resent by the requesting nodes.
200          */
201 
202         dlm_purge_requestqueue(ls);
203 
204         dlm_set_recover_status(ls, DLM_RS_DONE);
205 
206         error = dlm_recover_done_wait(ls);
207         if (error) {
208                 log_debug(ls, "dlm_recover_done_wait error %d", error);
209                 goto fail;
210         }
211 
212         dlm_clear_members_gone(ls);
213 
214         dlm_adjust_timeouts(ls);
215 
216         dlm_callback_resume(ls);
217 
218         error = enable_locking(ls, rv->seq);
219         if (error) {
220                 log_debug(ls, "enable_locking error %d", error);
221                 goto fail;
222         }
223 
224         error = dlm_process_requestqueue(ls);
225         if (error) {
226                 log_debug(ls, "dlm_process_requestqueue error %d", error);
227                 goto fail;
228         }
229 
230         error = dlm_recover_waiters_post(ls);
231         if (error) {
232                 log_debug(ls, "dlm_recover_waiters_post error %d", error);
233                 goto fail;
234         }
235 
236         dlm_recover_grant(ls);
237 
238         log_debug(ls, "dlm_recover %llu generation %u done: %u ms",
239                   (unsigned long long)rv->seq, ls->ls_generation,
240                   jiffies_to_msecs(jiffies - start));
241         mutex_unlock(&ls->ls_recoverd_active);
242 
243         dlm_lsop_recover_done(ls);
244         return 0;
245 
246  fail:
247         dlm_release_root_list(ls);
248         log_debug(ls, "dlm_recover %llu error %d",
249                   (unsigned long long)rv->seq, error);
250         mutex_unlock(&ls->ls_recoverd_active);
251         return error;
252 }
253 
254 /* The dlm_ls_start() that created the rv we take here may already have been
255    stopped via dlm_ls_stop(); in that case we need to leave the RECOVERY_STOP
256    flag set. */
257 
258 static void do_ls_recovery(struct dlm_ls *ls)
259 {
260         struct dlm_recover *rv = NULL;
261 
262         spin_lock(&ls->ls_recover_lock);
263         rv = ls->ls_recover_args;
264         ls->ls_recover_args = NULL;
265         if (rv && ls->ls_recover_seq == rv->seq)
266                 clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
267         spin_unlock(&ls->ls_recover_lock);
268 
269         if (rv) {
270                 ls_recover(ls, rv);
271                 kfree(rv->nodes);
272                 kfree(rv);
273         }
274 }
275 
276 static int dlm_recoverd(void *arg)
277 {
278         struct dlm_ls *ls;
279 
280         ls = dlm_find_lockspace_local(arg);
281         if (!ls) {
282                 log_print("dlm_recoverd: no lockspace %p", arg);
283                 return -1;
284         }
285 
286         down_write(&ls->ls_in_recovery);
287         set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
288         wake_up(&ls->ls_recover_lock_wait);
289 
290         while (!kthread_should_stop()) {
291                 set_current_state(TASK_INTERRUPTIBLE);
292                 if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) &&
293                     !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags))
294                         schedule();
295                 set_current_state(TASK_RUNNING);
296 
297                 if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
298                         down_write(&ls->ls_in_recovery);
299                         set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
300                         wake_up(&ls->ls_recover_lock_wait);
301                 }
302 
303                 if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags))
304                         do_ls_recovery(ls);
305         }
306 
307         if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags))
308                 up_write(&ls->ls_in_recovery);
309 
310         dlm_put_lockspace(ls);
311         return 0;
312 }
313 
314 int dlm_recoverd_start(struct dlm_ls *ls)
315 {
316         struct task_struct *p;
317         int error = 0;
318 
319         p = kthread_run(dlm_recoverd, ls, "dlm_recoverd");
320         if (IS_ERR(p))
321                 error = PTR_ERR(p);
322         else
323                 ls->ls_recoverd_task = p;
324         return error;
325 }
326 
327 void dlm_recoverd_stop(struct dlm_ls *ls)
328 {
329         kthread_stop(ls->ls_recoverd_task);
330 }
331 
332 void dlm_recoverd_suspend(struct dlm_ls *ls)
333 {
334         wake_up(&ls->ls_wait_general);
335         mutex_lock(&ls->ls_recoverd_active);
336 }
337 
338 void dlm_recoverd_resume(struct dlm_ls *ls)
339 {
340         mutex_unlock(&ls->ls_recoverd_active);
341 }
342 
343 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp