~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/ocfs2/stack_user.c

Version: ~ [ linux-5.11 ] ~ [ linux-5.10.17 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.99 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.176 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.221 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.257 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.257 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.85 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* -*- mode: c; c-basic-offset: 8; -*-
  2  * vim: noexpandtab sw=8 ts=8 sts=0:
  3  *
  4  * stack_user.c
  5  *
  6  * Code which interfaces ocfs2 with fs/dlm and a userspace stack.
  7  *
  8  * Copyright (C) 2007 Oracle.  All rights reserved.
  9  *
 10  * This program is free software; you can redistribute it and/or
 11  * modify it under the terms of the GNU General Public
 12  * License as published by the Free Software Foundation, version 2.
 13  *
 14  * This program is distributed in the hope that it will be useful,
 15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 17  * General Public License for more details.
 18  */
 19 
 20 #include <linux/module.h>
 21 #include <linux/fs.h>
 22 #include <linux/miscdevice.h>
 23 #include <linux/mutex.h>
 24 #include <linux/slab.h>
 25 #include <linux/reboot.h>
 26 #include <asm/uaccess.h>
 27 
 28 #include "stackglue.h"
 29 
 30 #include <linux/dlm_plock.h>
 31 
 32 /*
 33  * The control protocol starts with a handshake.  Until the handshake
 34  * is complete, the control device will fail all write(2)s.
 35  *
 36  * The handshake is simple.  First, the client reads until EOF.  Each line
 37  * of output is a supported protocol tag.  All protocol tags are a single
 38  * character followed by a two hex digit version number.  Currently the
 39  * only things supported is T01, for "Text-base version 0x01".  Next, the
 40  * client writes the version they would like to use, including the newline.
 41  * Thus, the protocol tag is 'T01\n'.  If the version tag written is
 42  * unknown, -EINVAL is returned.  Once the negotiation is complete, the
 43  * client can start sending messages.
 44  *
 45  * The T01 protocol has three messages.  First is the "SETN" message.
 46  * It has the following syntax:
 47  *
 48  *  SETN<space><8-char-hex-nodenum><newline>
 49  *
 50  * This is 14 characters.
 51  *
 52  * The "SETN" message must be the first message following the protocol.
 53  * It tells ocfs2_control the local node number.
 54  *
 55  * Next comes the "SETV" message.  It has the following syntax:
 56  *
 57  *  SETV<space><2-char-hex-major><space><2-char-hex-minor><newline>
 58  *
 59  * This is 11 characters.
 60  *
 61  * The "SETV" message sets the filesystem locking protocol version as
 62  * negotiated by the client.  The client negotiates based on the maximum
 63  * version advertised in /sys/fs/ocfs2/max_locking_protocol.  The major
 64  * number from the "SETV" message must match
 65  * ocfs2_user_plugin.sp_max_proto.pv_major, and the minor number
 66  * must be less than or equal to ...sp_max_version.pv_minor.
 67  *
 68  * Once this information has been set, mounts will be allowed.  From this
 69  * point on, the "DOWN" message can be sent for node down notification.
 70  * It has the following syntax:
 71  *
 72  *  DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline>
 73  *
 74  * eg:
 75  *
 76  *  DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n
 77  *
 78  * This is 47 characters.
 79  */
 80 
 81 /*
 82  * Whether or not the client has done the handshake.
 83  * For now, we have just one protocol version.
 84  */
 85 #define OCFS2_CONTROL_PROTO                     "T01\n"
 86 #define OCFS2_CONTROL_PROTO_LEN                 4
 87 
 88 /* Handshake states */
 89 #define OCFS2_CONTROL_HANDSHAKE_INVALID         (0)
 90 #define OCFS2_CONTROL_HANDSHAKE_READ            (1)
 91 #define OCFS2_CONTROL_HANDSHAKE_PROTOCOL        (2)
 92 #define OCFS2_CONTROL_HANDSHAKE_VALID           (3)
 93 
 94 /* Messages */
 95 #define OCFS2_CONTROL_MESSAGE_OP_LEN            4
 96 #define OCFS2_CONTROL_MESSAGE_SETNODE_OP        "SETN"
 97 #define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14
 98 #define OCFS2_CONTROL_MESSAGE_SETVERSION_OP     "SETV"
 99 #define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN      11
100 #define OCFS2_CONTROL_MESSAGE_DOWN_OP           "DOWN"
101 #define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN    47
102 #define OCFS2_TEXT_UUID_LEN                     32
103 #define OCFS2_CONTROL_MESSAGE_VERNUM_LEN        2
104 #define OCFS2_CONTROL_MESSAGE_NODENUM_LEN       8
105 
106 /*
107  * ocfs2_live_connection is refcounted because the filesystem and
108  * miscdevice sides can detach in different order.  Let's just be safe.
109  */
110 struct ocfs2_live_connection {
111         struct list_head                oc_list;
112         struct ocfs2_cluster_connection *oc_conn;
113 };
114 
115 struct ocfs2_control_private {
116         struct list_head op_list;
117         int op_state;
118         int op_this_node;
119         struct ocfs2_protocol_version op_proto;
120 };
121 
122 /* SETN<space><8-char-hex-nodenum><newline> */
123 struct ocfs2_control_message_setn {
124         char    tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
125         char    space;
126         char    nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
127         char    newline;
128 };
129 
130 /* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */
131 struct ocfs2_control_message_setv {
132         char    tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
133         char    space1;
134         char    major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
135         char    space2;
136         char    minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
137         char    newline;
138 };
139 
140 /* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */
141 struct ocfs2_control_message_down {
142         char    tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
143         char    space1;
144         char    uuid[OCFS2_TEXT_UUID_LEN];
145         char    space2;
146         char    nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
147         char    newline;
148 };
149 
150 union ocfs2_control_message {
151         char                                    tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
152         struct ocfs2_control_message_setn       u_setn;
153         struct ocfs2_control_message_setv       u_setv;
154         struct ocfs2_control_message_down       u_down;
155 };
156 
157 static struct ocfs2_stack_plugin ocfs2_user_plugin;
158 
159 static atomic_t ocfs2_control_opened;
160 static int ocfs2_control_this_node = -1;
161 static struct ocfs2_protocol_version running_proto;
162 
163 static LIST_HEAD(ocfs2_live_connection_list);
164 static LIST_HEAD(ocfs2_control_private_list);
165 static DEFINE_MUTEX(ocfs2_control_lock);
166 
167 static inline void ocfs2_control_set_handshake_state(struct file *file,
168                                                      int state)
169 {
170         struct ocfs2_control_private *p = file->private_data;
171         p->op_state = state;
172 }
173 
174 static inline int ocfs2_control_get_handshake_state(struct file *file)
175 {
176         struct ocfs2_control_private *p = file->private_data;
177         return p->op_state;
178 }
179 
180 static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
181 {
182         size_t len = strlen(name);
183         struct ocfs2_live_connection *c;
184 
185         BUG_ON(!mutex_is_locked(&ocfs2_control_lock));
186 
187         list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) {
188                 if ((c->oc_conn->cc_namelen == len) &&
189                     !strncmp(c->oc_conn->cc_name, name, len))
190                         return c;
191         }
192 
193         return NULL;
194 }
195 
196 /*
197  * ocfs2_live_connection structures are created underneath the ocfs2
198  * mount path.  Since the VFS prevents multiple calls to
199  * fill_super(), we can't get dupes here.
200  */
201 static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
202                                      struct ocfs2_live_connection **c_ret)
203 {
204         int rc = 0;
205         struct ocfs2_live_connection *c;
206 
207         c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
208         if (!c)
209                 return -ENOMEM;
210 
211         mutex_lock(&ocfs2_control_lock);
212         c->oc_conn = conn;
213 
214         if (atomic_read(&ocfs2_control_opened))
215                 list_add(&c->oc_list, &ocfs2_live_connection_list);
216         else {
217                 printk(KERN_ERR
218                        "ocfs2: Userspace control daemon is not present\n");
219                 rc = -ESRCH;
220         }
221 
222         mutex_unlock(&ocfs2_control_lock);
223 
224         if (!rc)
225                 *c_ret = c;
226         else
227                 kfree(c);
228 
229         return rc;
230 }
231 
232 /*
233  * This function disconnects the cluster connection from ocfs2_control.
234  * Afterwards, userspace can't affect the cluster connection.
235  */
236 static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c)
237 {
238         mutex_lock(&ocfs2_control_lock);
239         list_del_init(&c->oc_list);
240         c->oc_conn = NULL;
241         mutex_unlock(&ocfs2_control_lock);
242 
243         kfree(c);
244 }
245 
246 static int ocfs2_control_cfu(void *target, size_t target_len,
247                              const char __user *buf, size_t count)
248 {
249         /* The T01 expects write(2) calls to have exactly one command */
250         if ((count != target_len) ||
251             (count > sizeof(union ocfs2_control_message)))
252                 return -EINVAL;
253 
254         if (copy_from_user(target, buf, target_len))
255                 return -EFAULT;
256 
257         return 0;
258 }
259 
260 static ssize_t ocfs2_control_validate_protocol(struct file *file,
261                                                const char __user *buf,
262                                                size_t count)
263 {
264         ssize_t ret;
265         char kbuf[OCFS2_CONTROL_PROTO_LEN];
266 
267         ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN,
268                                 buf, count);
269         if (ret)
270                 return ret;
271 
272         if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN))
273                 return -EINVAL;
274 
275         ocfs2_control_set_handshake_state(file,
276                                           OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
277 
278         return count;
279 }
280 
281 static void ocfs2_control_send_down(const char *uuid,
282                                     int nodenum)
283 {
284         struct ocfs2_live_connection *c;
285 
286         mutex_lock(&ocfs2_control_lock);
287 
288         c = ocfs2_connection_find(uuid);
289         if (c) {
290                 BUG_ON(c->oc_conn == NULL);
291                 c->oc_conn->cc_recovery_handler(nodenum,
292                                                 c->oc_conn->cc_recovery_data);
293         }
294 
295         mutex_unlock(&ocfs2_control_lock);
296 }
297 
298 /*
299  * Called whenever configuration elements are sent to /dev/ocfs2_control.
300  * If all configuration elements are present, try to set the global
301  * values.  If there is a problem, return an error.  Skip any missing
302  * elements, and only bump ocfs2_control_opened when we have all elements
303  * and are successful.
304  */
305 static int ocfs2_control_install_private(struct file *file)
306 {
307         int rc = 0;
308         int set_p = 1;
309         struct ocfs2_control_private *p = file->private_data;
310 
311         BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
312 
313         mutex_lock(&ocfs2_control_lock);
314 
315         if (p->op_this_node < 0) {
316                 set_p = 0;
317         } else if ((ocfs2_control_this_node >= 0) &&
318                    (ocfs2_control_this_node != p->op_this_node)) {
319                 rc = -EINVAL;
320                 goto out_unlock;
321         }
322 
323         if (!p->op_proto.pv_major) {
324                 set_p = 0;
325         } else if (!list_empty(&ocfs2_live_connection_list) &&
326                    ((running_proto.pv_major != p->op_proto.pv_major) ||
327                     (running_proto.pv_minor != p->op_proto.pv_minor))) {
328                 rc = -EINVAL;
329                 goto out_unlock;
330         }
331 
332         if (set_p) {
333                 ocfs2_control_this_node = p->op_this_node;
334                 running_proto.pv_major = p->op_proto.pv_major;
335                 running_proto.pv_minor = p->op_proto.pv_minor;
336         }
337 
338 out_unlock:
339         mutex_unlock(&ocfs2_control_lock);
340 
341         if (!rc && set_p) {
342                 /* We set the global values successfully */
343                 atomic_inc(&ocfs2_control_opened);
344                 ocfs2_control_set_handshake_state(file,
345                                         OCFS2_CONTROL_HANDSHAKE_VALID);
346         }
347 
348         return rc;
349 }
350 
351 static int ocfs2_control_get_this_node(void)
352 {
353         int rc;
354 
355         mutex_lock(&ocfs2_control_lock);
356         if (ocfs2_control_this_node < 0)
357                 rc = -EINVAL;
358         else
359                 rc = ocfs2_control_this_node;
360         mutex_unlock(&ocfs2_control_lock);
361 
362         return rc;
363 }
364 
365 static int ocfs2_control_do_setnode_msg(struct file *file,
366                                         struct ocfs2_control_message_setn *msg)
367 {
368         long nodenum;
369         char *ptr = NULL;
370         struct ocfs2_control_private *p = file->private_data;
371 
372         if (ocfs2_control_get_handshake_state(file) !=
373             OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
374                 return -EINVAL;
375 
376         if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
377                     OCFS2_CONTROL_MESSAGE_OP_LEN))
378                 return -EINVAL;
379 
380         if ((msg->space != ' ') || (msg->newline != '\n'))
381                 return -EINVAL;
382         msg->space = msg->newline = '\0';
383 
384         nodenum = simple_strtol(msg->nodestr, &ptr, 16);
385         if (!ptr || *ptr)
386                 return -EINVAL;
387 
388         if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
389             (nodenum > INT_MAX) || (nodenum < 0))
390                 return -ERANGE;
391         p->op_this_node = nodenum;
392 
393         return ocfs2_control_install_private(file);
394 }
395 
396 static int ocfs2_control_do_setversion_msg(struct file *file,
397                                            struct ocfs2_control_message_setv *msg)
398  {
399         long major, minor;
400         char *ptr = NULL;
401         struct ocfs2_control_private *p = file->private_data;
402         struct ocfs2_protocol_version *max =
403                 &ocfs2_user_plugin.sp_max_proto;
404 
405         if (ocfs2_control_get_handshake_state(file) !=
406             OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
407                 return -EINVAL;
408 
409         if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
410                     OCFS2_CONTROL_MESSAGE_OP_LEN))
411                 return -EINVAL;
412 
413         if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
414             (msg->newline != '\n'))
415                 return -EINVAL;
416         msg->space1 = msg->space2 = msg->newline = '\0';
417 
418         major = simple_strtol(msg->major, &ptr, 16);
419         if (!ptr || *ptr)
420                 return -EINVAL;
421         minor = simple_strtol(msg->minor, &ptr, 16);
422         if (!ptr || *ptr)
423                 return -EINVAL;
424 
425         /*
426          * The major must be between 1 and 255, inclusive.  The minor
427          * must be between 0 and 255, inclusive.  The version passed in
428          * must be within the maximum version supported by the filesystem.
429          */
430         if ((major == LONG_MIN) || (major == LONG_MAX) ||
431             (major > (u8)-1) || (major < 1))
432                 return -ERANGE;
433         if ((minor == LONG_MIN) || (minor == LONG_MAX) ||
434             (minor > (u8)-1) || (minor < 0))
435                 return -ERANGE;
436         if ((major != max->pv_major) ||
437             (minor > max->pv_minor))
438                 return -EINVAL;
439 
440         p->op_proto.pv_major = major;
441         p->op_proto.pv_minor = minor;
442 
443         return ocfs2_control_install_private(file);
444 }
445 
446 static int ocfs2_control_do_down_msg(struct file *file,
447                                      struct ocfs2_control_message_down *msg)
448 {
449         long nodenum;
450         char *p = NULL;
451 
452         if (ocfs2_control_get_handshake_state(file) !=
453             OCFS2_CONTROL_HANDSHAKE_VALID)
454                 return -EINVAL;
455 
456         if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
457                     OCFS2_CONTROL_MESSAGE_OP_LEN))
458                 return -EINVAL;
459 
460         if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
461             (msg->newline != '\n'))
462                 return -EINVAL;
463         msg->space1 = msg->space2 = msg->newline = '\0';
464 
465         nodenum = simple_strtol(msg->nodestr, &p, 16);
466         if (!p || *p)
467                 return -EINVAL;
468 
469         if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
470             (nodenum > INT_MAX) || (nodenum < 0))
471                 return -ERANGE;
472 
473         ocfs2_control_send_down(msg->uuid, nodenum);
474 
475         return 0;
476 }
477 
478 static ssize_t ocfs2_control_message(struct file *file,
479                                      const char __user *buf,
480                                      size_t count)
481 {
482         ssize_t ret;
483         union ocfs2_control_message msg;
484 
485         /* Try to catch padding issues */
486         WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) !=
487                 (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1)));
488 
489         memset(&msg, 0, sizeof(union ocfs2_control_message));
490         ret = ocfs2_control_cfu(&msg, count, buf, count);
491         if (ret)
492                 goto out;
493 
494         if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) &&
495             !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
496                      OCFS2_CONTROL_MESSAGE_OP_LEN))
497                 ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn);
498         else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) &&
499                  !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
500                           OCFS2_CONTROL_MESSAGE_OP_LEN))
501                 ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv);
502         else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) &&
503                  !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
504                           OCFS2_CONTROL_MESSAGE_OP_LEN))
505                 ret = ocfs2_control_do_down_msg(file, &msg.u_down);
506         else
507                 ret = -EINVAL;
508 
509 out:
510         return ret ? ret : count;
511 }
512 
513 static ssize_t ocfs2_control_write(struct file *file,
514                                    const char __user *buf,
515                                    size_t count,
516                                    loff_t *ppos)
517 {
518         ssize_t ret;
519 
520         switch (ocfs2_control_get_handshake_state(file)) {
521                 case OCFS2_CONTROL_HANDSHAKE_INVALID:
522                         ret = -EINVAL;
523                         break;
524 
525                 case OCFS2_CONTROL_HANDSHAKE_READ:
526                         ret = ocfs2_control_validate_protocol(file, buf,
527                                                               count);
528                         break;
529 
530                 case OCFS2_CONTROL_HANDSHAKE_PROTOCOL:
531                 case OCFS2_CONTROL_HANDSHAKE_VALID:
532                         ret = ocfs2_control_message(file, buf, count);
533                         break;
534 
535                 default:
536                         BUG();
537                         ret = -EIO;
538                         break;
539         }
540 
541         return ret;
542 }
543 
544 /*
545  * This is a naive version.  If we ever have a new protocol, we'll expand
546  * it.  Probably using seq_file.
547  */
548 static ssize_t ocfs2_control_read(struct file *file,
549                                   char __user *buf,
550                                   size_t count,
551                                   loff_t *ppos)
552 {
553         ssize_t ret;
554 
555         ret = simple_read_from_buffer(buf, count, ppos,
556                         OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN);
557 
558         /* Have we read the whole protocol list? */
559         if (ret > 0 && *ppos >= OCFS2_CONTROL_PROTO_LEN)
560                 ocfs2_control_set_handshake_state(file,
561                                                   OCFS2_CONTROL_HANDSHAKE_READ);
562 
563         return ret;
564 }
565 
566 static int ocfs2_control_release(struct inode *inode, struct file *file)
567 {
568         struct ocfs2_control_private *p = file->private_data;
569 
570         mutex_lock(&ocfs2_control_lock);
571 
572         if (ocfs2_control_get_handshake_state(file) !=
573             OCFS2_CONTROL_HANDSHAKE_VALID)
574                 goto out;
575 
576         if (atomic_dec_and_test(&ocfs2_control_opened)) {
577                 if (!list_empty(&ocfs2_live_connection_list)) {
578                         /* XXX: Do bad things! */
579                         printk(KERN_ERR
580                                "ocfs2: Unexpected release of ocfs2_control!\n"
581                                "       Loss of cluster connection requires "
582                                "an emergency restart!\n");
583                         emergency_restart();
584                 }
585                 /*
586                  * Last valid close clears the node number and resets
587                  * the locking protocol version
588                  */
589                 ocfs2_control_this_node = -1;
590                 running_proto.pv_major = 0;
591                 running_proto.pv_major = 0;
592         }
593 
594 out:
595         list_del_init(&p->op_list);
596         file->private_data = NULL;
597 
598         mutex_unlock(&ocfs2_control_lock);
599 
600         kfree(p);
601 
602         return 0;
603 }
604 
605 static int ocfs2_control_open(struct inode *inode, struct file *file)
606 {
607         struct ocfs2_control_private *p;
608 
609         p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL);
610         if (!p)
611                 return -ENOMEM;
612         p->op_this_node = -1;
613 
614         mutex_lock(&ocfs2_control_lock);
615         file->private_data = p;
616         list_add(&p->op_list, &ocfs2_control_private_list);
617         mutex_unlock(&ocfs2_control_lock);
618 
619         return 0;
620 }
621 
622 static const struct file_operations ocfs2_control_fops = {
623         .open    = ocfs2_control_open,
624         .release = ocfs2_control_release,
625         .read    = ocfs2_control_read,
626         .write   = ocfs2_control_write,
627         .owner   = THIS_MODULE,
628         .llseek  = default_llseek,
629 };
630 
631 static struct miscdevice ocfs2_control_device = {
632         .minor          = MISC_DYNAMIC_MINOR,
633         .name           = "ocfs2_control",
634         .fops           = &ocfs2_control_fops,
635 };
636 
637 static int ocfs2_control_init(void)
638 {
639         int rc;
640 
641         atomic_set(&ocfs2_control_opened, 0);
642 
643         rc = misc_register(&ocfs2_control_device);
644         if (rc)
645                 printk(KERN_ERR
646                        "ocfs2: Unable to register ocfs2_control device "
647                        "(errno %d)\n",
648                        -rc);
649 
650         return rc;
651 }
652 
653 static void ocfs2_control_exit(void)
654 {
655         int rc;
656 
657         rc = misc_deregister(&ocfs2_control_device);
658         if (rc)
659                 printk(KERN_ERR
660                        "ocfs2: Unable to deregister ocfs2_control device "
661                        "(errno %d)\n",
662                        -rc);
663 }
664 
665 static void fsdlm_lock_ast_wrapper(void *astarg)
666 {
667         struct ocfs2_dlm_lksb *lksb = astarg;
668         int status = lksb->lksb_fsdlm.sb_status;
669 
670         /*
671          * For now we're punting on the issue of other non-standard errors
672          * where we can't tell if the unlock_ast or lock_ast should be called.
673          * The main "other error" that's possible is EINVAL which means the
674          * function was called with invalid args, which shouldn't be possible
675          * since the caller here is under our control.  Other non-standard
676          * errors probably fall into the same category, or otherwise are fatal
677          * which means we can't carry on anyway.
678          */
679 
680         if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL)
681                 lksb->lksb_conn->cc_proto->lp_unlock_ast(lksb, 0);
682         else
683                 lksb->lksb_conn->cc_proto->lp_lock_ast(lksb);
684 }
685 
686 static void fsdlm_blocking_ast_wrapper(void *astarg, int level)
687 {
688         struct ocfs2_dlm_lksb *lksb = astarg;
689 
690         lksb->lksb_conn->cc_proto->lp_blocking_ast(lksb, level);
691 }
692 
693 static int user_dlm_lock(struct ocfs2_cluster_connection *conn,
694                          int mode,
695                          struct ocfs2_dlm_lksb *lksb,
696                          u32 flags,
697                          void *name,
698                          unsigned int namelen)
699 {
700         int ret;
701 
702         if (!lksb->lksb_fsdlm.sb_lvbptr)
703                 lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
704                                              sizeof(struct dlm_lksb);
705 
706         ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm,
707                        flags|DLM_LKF_NODLCKWT, name, namelen, 0,
708                        fsdlm_lock_ast_wrapper, lksb,
709                        fsdlm_blocking_ast_wrapper);
710         return ret;
711 }
712 
713 static int user_dlm_unlock(struct ocfs2_cluster_connection *conn,
714                            struct ocfs2_dlm_lksb *lksb,
715                            u32 flags)
716 {
717         int ret;
718 
719         ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid,
720                          flags, &lksb->lksb_fsdlm, lksb);
721         return ret;
722 }
723 
724 static int user_dlm_lock_status(struct ocfs2_dlm_lksb *lksb)
725 {
726         return lksb->lksb_fsdlm.sb_status;
727 }
728 
729 static int user_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb)
730 {
731         int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID;
732 
733         return !invalid;
734 }
735 
736 static void *user_dlm_lvb(struct ocfs2_dlm_lksb *lksb)
737 {
738         if (!lksb->lksb_fsdlm.sb_lvbptr)
739                 lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
740                                              sizeof(struct dlm_lksb);
741         return (void *)(lksb->lksb_fsdlm.sb_lvbptr);
742 }
743 
744 static void user_dlm_dump_lksb(struct ocfs2_dlm_lksb *lksb)
745 {
746 }
747 
748 static int user_plock(struct ocfs2_cluster_connection *conn,
749                       u64 ino,
750                       struct file *file,
751                       int cmd,
752                       struct file_lock *fl)
753 {
754         /*
755          * This more or less just demuxes the plock request into any
756          * one of three dlm calls.
757          *
758          * Internally, fs/dlm will pass these to a misc device, which
759          * a userspace daemon will read and write to.
760          *
761          * For now, cancel requests (which happen internally only),
762          * are turned into unlocks. Most of this function taken from
763          * gfs2_lock.
764          */
765 
766         if (cmd == F_CANCELLK) {
767                 cmd = F_SETLK;
768                 fl->fl_type = F_UNLCK;
769         }
770 
771         if (IS_GETLK(cmd))
772                 return dlm_posix_get(conn->cc_lockspace, ino, file, fl);
773         else if (fl->fl_type == F_UNLCK)
774                 return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl);
775         else
776                 return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl);
777 }
778 
779 /*
780  * Compare a requested locking protocol version against the current one.
781  *
782  * If the major numbers are different, they are incompatible.
783  * If the current minor is greater than the request, they are incompatible.
784  * If the current minor is less than or equal to the request, they are
785  * compatible, and the requester should run at the current minor version.
786  */
787 static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
788                                struct ocfs2_protocol_version *request)
789 {
790         if (existing->pv_major != request->pv_major)
791                 return 1;
792 
793         if (existing->pv_minor > request->pv_minor)
794                 return 1;
795 
796         if (existing->pv_minor < request->pv_minor)
797                 request->pv_minor = existing->pv_minor;
798 
799         return 0;
800 }
801 
802 static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
803 {
804         dlm_lockspace_t *fsdlm;
805         struct ocfs2_live_connection *uninitialized_var(control);
806         int rc = 0;
807 
808         BUG_ON(conn == NULL);
809 
810         rc = ocfs2_live_connection_new(conn, &control);
811         if (rc)
812                 goto out;
813 
814         /*
815          * running_proto must have been set before we allowed any mounts
816          * to proceed.
817          */
818         if (fs_protocol_compare(&running_proto, &conn->cc_version)) {
819                 printk(KERN_ERR
820                        "Unable to mount with fs locking protocol version "
821                        "%u.%u because the userspace control daemon has "
822                        "negotiated %u.%u\n",
823                        conn->cc_version.pv_major, conn->cc_version.pv_minor,
824                        running_proto.pv_major, running_proto.pv_minor);
825                 rc = -EPROTO;
826                 ocfs2_live_connection_drop(control);
827                 goto out;
828         }
829 
830         rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN,
831                                NULL, NULL, NULL, &fsdlm);
832         if (rc) {
833                 ocfs2_live_connection_drop(control);
834                 goto out;
835         }
836 
837         conn->cc_private = control;
838         conn->cc_lockspace = fsdlm;
839 out:
840         return rc;
841 }
842 
843 static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
844 {
845         dlm_release_lockspace(conn->cc_lockspace, 2);
846         conn->cc_lockspace = NULL;
847         ocfs2_live_connection_drop(conn->cc_private);
848         conn->cc_private = NULL;
849         return 0;
850 }
851 
852 static int user_cluster_this_node(unsigned int *this_node)
853 {
854         int rc;
855 
856         rc = ocfs2_control_get_this_node();
857         if (rc < 0)
858                 return rc;
859 
860         *this_node = rc;
861         return 0;
862 }
863 
864 static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
865         .connect        = user_cluster_connect,
866         .disconnect     = user_cluster_disconnect,
867         .this_node      = user_cluster_this_node,
868         .dlm_lock       = user_dlm_lock,
869         .dlm_unlock     = user_dlm_unlock,
870         .lock_status    = user_dlm_lock_status,
871         .lvb_valid      = user_dlm_lvb_valid,
872         .lock_lvb       = user_dlm_lvb,
873         .plock          = user_plock,
874         .dump_lksb      = user_dlm_dump_lksb,
875 };
876 
877 static struct ocfs2_stack_plugin ocfs2_user_plugin = {
878         .sp_name        = "user",
879         .sp_ops         = &ocfs2_user_plugin_ops,
880         .sp_owner       = THIS_MODULE,
881 };
882 
883 
884 static int __init ocfs2_user_plugin_init(void)
885 {
886         int rc;
887 
888         rc = ocfs2_control_init();
889         if (!rc) {
890                 rc = ocfs2_stack_glue_register(&ocfs2_user_plugin);
891                 if (rc)
892                         ocfs2_control_exit();
893         }
894 
895         return rc;
896 }
897 
898 static void __exit ocfs2_user_plugin_exit(void)
899 {
900         ocfs2_stack_glue_unregister(&ocfs2_user_plugin);
901         ocfs2_control_exit();
902 }
903 
904 MODULE_AUTHOR("Oracle");
905 MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks");
906 MODULE_LICENSE("GPL");
907 module_init(ocfs2_user_plugin_init);
908 module_exit(ocfs2_user_plugin_exit);
909 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp