~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/smc/af_smc.c

Version: ~ [ linux-5.12-rc7 ] ~ [ linux-5.11.13 ] ~ [ linux-5.10.29 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.111 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.186 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.230 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.266 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.266 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
  3  *
  4  *  AF_SMC protocol family socket handler keeping the AF_INET sock address type
  5  *  applies to SOCK_STREAM sockets only
  6  *  offers an alternative communication option for TCP-protocol sockets
  7  *  applicable with RoCE-cards only
  8  *
  9  *  Initial restrictions:
 10  *    - support for alternate links postponed
 11  *    - partial support for non-blocking sockets only
 12  *    - support for urgent data postponed
 13  *
 14  *  Copyright IBM Corp. 2016, 2018
 15  *
 16  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
 17  *              based on prototype from Frank Blaschka
 18  */
 19 
 20 #define KMSG_COMPONENT "smc"
 21 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 22 
 23 #include <linux/module.h>
 24 #include <linux/socket.h>
 25 #include <linux/workqueue.h>
 26 #include <linux/in.h>
 27 #include <linux/sched/signal.h>
 28 
 29 #include <net/sock.h>
 30 #include <net/tcp.h>
 31 #include <net/smc.h>
 32 
 33 #include "smc.h"
 34 #include "smc_clc.h"
 35 #include "smc_llc.h"
 36 #include "smc_cdc.h"
 37 #include "smc_core.h"
 38 #include "smc_ib.h"
 39 #include "smc_pnet.h"
 40 #include "smc_tx.h"
 41 #include "smc_rx.h"
 42 #include "smc_close.h"
 43 
 44 static DEFINE_MUTEX(smc_create_lgr_pending);    /* serialize link group
 45                                                  * creation
 46                                                  */
 47 
 48 struct smc_lgr_list smc_lgr_list = {            /* established link groups */
 49         .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
 50         .list = LIST_HEAD_INIT(smc_lgr_list.list),
 51 };
 52 
 53 static void smc_tcp_listen_work(struct work_struct *);
 54 
 55 static void smc_set_keepalive(struct sock *sk, int val)
 56 {
 57         struct smc_sock *smc = smc_sk(sk);
 58 
 59         smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
 60 }
 61 
 62 static struct smc_hashinfo smc_v4_hashinfo = {
 63         .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
 64 };
 65 
 66 static struct smc_hashinfo smc_v6_hashinfo = {
 67         .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
 68 };
 69 
 70 int smc_hash_sk(struct sock *sk)
 71 {
 72         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
 73         struct hlist_head *head;
 74 
 75         head = &h->ht;
 76 
 77         write_lock_bh(&h->lock);
 78         sk_add_node(sk, head);
 79         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 80         write_unlock_bh(&h->lock);
 81 
 82         return 0;
 83 }
 84 EXPORT_SYMBOL_GPL(smc_hash_sk);
 85 
 86 void smc_unhash_sk(struct sock *sk)
 87 {
 88         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
 89 
 90         write_lock_bh(&h->lock);
 91         if (sk_del_node_init(sk))
 92                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 93         write_unlock_bh(&h->lock);
 94 }
 95 EXPORT_SYMBOL_GPL(smc_unhash_sk);
 96 
 97 struct proto smc_proto = {
 98         .name           = "SMC",
 99         .owner          = THIS_MODULE,
100         .keepalive      = smc_set_keepalive,
101         .hash           = smc_hash_sk,
102         .unhash         = smc_unhash_sk,
103         .obj_size       = sizeof(struct smc_sock),
104         .h.smc_hash     = &smc_v4_hashinfo,
105         .slab_flags     = SLAB_TYPESAFE_BY_RCU,
106 };
107 EXPORT_SYMBOL_GPL(smc_proto);
108 
109 struct proto smc_proto6 = {
110         .name           = "SMC6",
111         .owner          = THIS_MODULE,
112         .keepalive      = smc_set_keepalive,
113         .hash           = smc_hash_sk,
114         .unhash         = smc_unhash_sk,
115         .obj_size       = sizeof(struct smc_sock),
116         .h.smc_hash     = &smc_v6_hashinfo,
117         .slab_flags     = SLAB_TYPESAFE_BY_RCU,
118 };
119 EXPORT_SYMBOL_GPL(smc_proto6);
120 
121 static int smc_release(struct socket *sock)
122 {
123         struct sock *sk = sock->sk;
124         struct smc_sock *smc;
125         int rc = 0;
126 
127         if (!sk)
128                 goto out;
129 
130         smc = smc_sk(sk);
131         if (sk->sk_state == SMC_LISTEN)
132                 /* smc_close_non_accepted() is called and acquires
133                  * sock lock for child sockets again
134                  */
135                 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
136         else
137                 lock_sock(sk);
138 
139         if (!smc->use_fallback) {
140                 rc = smc_close_active(smc);
141                 sock_set_flag(sk, SOCK_DEAD);
142                 sk->sk_shutdown |= SHUTDOWN_MASK;
143         }
144         if (smc->clcsock) {
145                 sock_release(smc->clcsock);
146                 smc->clcsock = NULL;
147         }
148         if (smc->use_fallback) {
149                 sock_put(sk); /* passive closing */
150                 sk->sk_state = SMC_CLOSED;
151                 sk->sk_state_change(sk);
152         }
153 
154         /* detach socket */
155         sock_orphan(sk);
156         sock->sk = NULL;
157         if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
158                 smc_conn_free(&smc->conn);
159         release_sock(sk);
160 
161         sk->sk_prot->unhash(sk);
162         sock_put(sk); /* final sock_put */
163 out:
164         return rc;
165 }
166 
167 static void smc_destruct(struct sock *sk)
168 {
169         if (sk->sk_state != SMC_CLOSED)
170                 return;
171         if (!sock_flag(sk, SOCK_DEAD))
172                 return;
173 
174         sk_refcnt_debug_dec(sk);
175 }
176 
177 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
178                                    int protocol)
179 {
180         struct smc_sock *smc;
181         struct proto *prot;
182         struct sock *sk;
183 
184         prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
185         sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
186         if (!sk)
187                 return NULL;
188 
189         sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
190         sk->sk_state = SMC_INIT;
191         sk->sk_destruct = smc_destruct;
192         sk->sk_protocol = protocol;
193         smc = smc_sk(sk);
194         INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
195         INIT_LIST_HEAD(&smc->accept_q);
196         spin_lock_init(&smc->accept_q_lock);
197         sk->sk_prot->hash(sk);
198         sk_refcnt_debug_inc(sk);
199 
200         return sk;
201 }
202 
203 static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
204                     int addr_len)
205 {
206         struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
207         struct sock *sk = sock->sk;
208         struct smc_sock *smc;
209         int rc;
210 
211         smc = smc_sk(sk);
212 
213         /* replicate tests from inet_bind(), to be safe wrt. future changes */
214         rc = -EINVAL;
215         if (addr_len < sizeof(struct sockaddr_in))
216                 goto out;
217 
218         rc = -EAFNOSUPPORT;
219         if (addr->sin_family != AF_INET &&
220             addr->sin_family != AF_INET6 &&
221             addr->sin_family != AF_UNSPEC)
222                 goto out;
223         /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
224         if (addr->sin_family == AF_UNSPEC &&
225             addr->sin_addr.s_addr != htonl(INADDR_ANY))
226                 goto out;
227 
228         lock_sock(sk);
229 
230         /* Check if socket is already active */
231         rc = -EINVAL;
232         if (sk->sk_state != SMC_INIT)
233                 goto out_rel;
234 
235         smc->clcsock->sk->sk_reuse = sk->sk_reuse;
236         rc = kernel_bind(smc->clcsock, uaddr, addr_len);
237 
238 out_rel:
239         release_sock(sk);
240 out:
241         return rc;
242 }
243 
244 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
245                                    unsigned long mask)
246 {
247         /* options we don't get control via setsockopt for */
248         nsk->sk_type = osk->sk_type;
249         nsk->sk_sndbuf = osk->sk_sndbuf;
250         nsk->sk_rcvbuf = osk->sk_rcvbuf;
251         nsk->sk_sndtimeo = osk->sk_sndtimeo;
252         nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
253         nsk->sk_mark = osk->sk_mark;
254         nsk->sk_priority = osk->sk_priority;
255         nsk->sk_rcvlowat = osk->sk_rcvlowat;
256         nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
257         nsk->sk_err = osk->sk_err;
258 
259         nsk->sk_flags &= ~mask;
260         nsk->sk_flags |= osk->sk_flags & mask;
261 }
262 
263 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
264                              (1UL << SOCK_KEEPOPEN) | \
265                              (1UL << SOCK_LINGER) | \
266                              (1UL << SOCK_BROADCAST) | \
267                              (1UL << SOCK_TIMESTAMP) | \
268                              (1UL << SOCK_DBG) | \
269                              (1UL << SOCK_RCVTSTAMP) | \
270                              (1UL << SOCK_RCVTSTAMPNS) | \
271                              (1UL << SOCK_LOCALROUTE) | \
272                              (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
273                              (1UL << SOCK_RXQ_OVFL) | \
274                              (1UL << SOCK_WIFI_STATUS) | \
275                              (1UL << SOCK_NOFCS) | \
276                              (1UL << SOCK_FILTER_LOCKED))
277 /* copy only relevant settings and flags of SOL_SOCKET level from smc to
278  * clc socket (since smc is not called for these options from net/core)
279  */
280 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
281 {
282         smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
283 }
284 
285 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
286                              (1UL << SOCK_KEEPOPEN) | \
287                              (1UL << SOCK_LINGER) | \
288                              (1UL << SOCK_DBG))
289 /* copy only settings and flags relevant for smc from clc to smc socket */
290 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
291 {
292         smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
293 }
294 
295 /* register a new rmb */
296 static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
297 {
298         /* register memory region for new rmb */
299         if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
300                 rmb_desc->regerr = 1;
301                 return -EFAULT;
302         }
303         return 0;
304 }
305 
306 static int smc_clnt_conf_first_link(struct smc_sock *smc)
307 {
308         struct smc_link_group *lgr = smc->conn.lgr;
309         struct smc_link *link;
310         int rest;
311         int rc;
312 
313         link = &lgr->lnk[SMC_SINGLE_LINK];
314         /* receive CONFIRM LINK request from server over RoCE fabric */
315         rest = wait_for_completion_interruptible_timeout(
316                 &link->llc_confirm,
317                 SMC_LLC_WAIT_FIRST_TIME);
318         if (rest <= 0) {
319                 struct smc_clc_msg_decline dclc;
320 
321                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
322                                       SMC_CLC_DECLINE);
323                 return rc;
324         }
325 
326         if (link->llc_confirm_rc)
327                 return SMC_CLC_DECL_RMBE_EC;
328 
329         rc = smc_ib_modify_qp_rts(link);
330         if (rc)
331                 return SMC_CLC_DECL_INTERR;
332 
333         smc_wr_remember_qp_attr(link);
334 
335         if (smc_reg_rmb(link, smc->conn.rmb_desc))
336                 return SMC_CLC_DECL_INTERR;
337 
338         /* send CONFIRM LINK response over RoCE fabric */
339         rc = smc_llc_send_confirm_link(link,
340                                        link->smcibdev->mac[link->ibport - 1],
341                                        &link->smcibdev->gid[link->ibport - 1],
342                                        SMC_LLC_RESP);
343         if (rc < 0)
344                 return SMC_CLC_DECL_TCL;
345 
346         /* receive ADD LINK request from server over RoCE fabric */
347         rest = wait_for_completion_interruptible_timeout(&link->llc_add,
348                                                          SMC_LLC_WAIT_TIME);
349         if (rest <= 0) {
350                 struct smc_clc_msg_decline dclc;
351 
352                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
353                                       SMC_CLC_DECLINE);
354                 return rc;
355         }
356 
357         /* send add link reject message, only one link supported for now */
358         rc = smc_llc_send_add_link(link,
359                                    link->smcibdev->mac[link->ibport - 1],
360                                    &link->smcibdev->gid[link->ibport - 1],
361                                    SMC_LLC_RESP);
362         if (rc < 0)
363                 return SMC_CLC_DECL_TCL;
364 
365         link->state = SMC_LNK_ACTIVE;
366 
367         return 0;
368 }
369 
370 static void smc_conn_save_peer_info(struct smc_sock *smc,
371                                     struct smc_clc_msg_accept_confirm *clc)
372 {
373         smc->conn.peer_conn_idx = clc->conn_idx;
374         smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
375         smc->conn.peer_rmbe_size = smc_uncompress_bufsize(clc->rmbe_size);
376         atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
377 }
378 
379 static void smc_link_save_peer_info(struct smc_link *link,
380                                     struct smc_clc_msg_accept_confirm *clc)
381 {
382         link->peer_qpn = ntoh24(clc->qpn);
383         memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE);
384         memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac));
385         link->peer_psn = ntoh24(clc->psn);
386         link->peer_mtu = clc->qp_mtu;
387 }
388 
389 /* setup for RDMA connection of client */
390 static int smc_connect_rdma(struct smc_sock *smc)
391 {
392         struct smc_clc_msg_accept_confirm aclc;
393         int local_contact = SMC_FIRST_CONTACT;
394         struct smc_ib_device *smcibdev;
395         struct smc_link *link;
396         u8 srv_first_contact;
397         int reason_code = 0;
398         int rc = 0;
399         u8 ibport;
400 
401         sock_hold(&smc->sk); /* sock put in passive closing */
402 
403         if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
404                 /* peer has not signalled SMC-capability */
405                 smc->use_fallback = true;
406                 goto out_connected;
407         }
408 
409         /* IPSec connections opt out of SMC-R optimizations */
410         if (using_ipsec(smc)) {
411                 reason_code = SMC_CLC_DECL_IPSEC;
412                 goto decline_rdma;
413         }
414 
415         /* PNET table look up: search active ib_device and port
416          * within same PNETID that also contains the ethernet device
417          * used for the internal TCP socket
418          */
419         smc_pnet_find_roce_resource(smc->clcsock->sk, &smcibdev, &ibport);
420         if (!smcibdev) {
421                 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
422                 goto decline_rdma;
423         }
424 
425         /* do inband token exchange */
426         reason_code = smc_clc_send_proposal(smc, smcibdev, ibport);
427         if (reason_code < 0) {
428                 rc = reason_code;
429                 goto out_err;
430         }
431         if (reason_code > 0) /* configuration error */
432                 goto decline_rdma;
433         /* receive SMC Accept CLC message */
434         reason_code = smc_clc_wait_msg(smc, &aclc, sizeof(aclc),
435                                        SMC_CLC_ACCEPT);
436         if (reason_code < 0) {
437                 rc = reason_code;
438                 goto out_err;
439         }
440         if (reason_code > 0)
441                 goto decline_rdma;
442 
443         srv_first_contact = aclc.hdr.flag;
444         mutex_lock(&smc_create_lgr_pending);
445         local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
446                                         srv_first_contact);
447         if (local_contact < 0) {
448                 rc = local_contact;
449                 if (rc == -ENOMEM)
450                         reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
451                 else if (rc == -ENOLINK)
452                         reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
453                 goto decline_rdma_unlock;
454         }
455         link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
456 
457         smc_conn_save_peer_info(smc, &aclc);
458 
459         /* create send buffer and rmb */
460         rc = smc_buf_create(smc);
461         if (rc) {
462                 reason_code = SMC_CLC_DECL_MEM;
463                 goto decline_rdma_unlock;
464         }
465 
466         if (local_contact == SMC_FIRST_CONTACT)
467                 smc_link_save_peer_info(link, &aclc);
468 
469         rc = smc_rmb_rtoken_handling(&smc->conn, &aclc);
470         if (rc) {
471                 reason_code = SMC_CLC_DECL_INTERR;
472                 goto decline_rdma_unlock;
473         }
474 
475         smc_close_init(smc);
476         smc_rx_init(smc);
477 
478         if (local_contact == SMC_FIRST_CONTACT) {
479                 rc = smc_ib_ready_link(link);
480                 if (rc) {
481                         reason_code = SMC_CLC_DECL_INTERR;
482                         goto decline_rdma_unlock;
483                 }
484         } else {
485                 if (!smc->conn.rmb_desc->reused) {
486                         if (smc_reg_rmb(link, smc->conn.rmb_desc)) {
487                                 reason_code = SMC_CLC_DECL_INTERR;
488                                 goto decline_rdma_unlock;
489                         }
490                 }
491         }
492         smc_rmb_sync_sg_for_device(&smc->conn);
493 
494         rc = smc_clc_send_confirm(smc);
495         if (rc)
496                 goto out_err_unlock;
497 
498         if (local_contact == SMC_FIRST_CONTACT) {
499                 /* QP confirmation over RoCE fabric */
500                 reason_code = smc_clnt_conf_first_link(smc);
501                 if (reason_code < 0) {
502                         rc = reason_code;
503                         goto out_err_unlock;
504                 }
505                 if (reason_code > 0)
506                         goto decline_rdma_unlock;
507         }
508 
509         mutex_unlock(&smc_create_lgr_pending);
510         smc_tx_init(smc);
511 
512 out_connected:
513         smc_copy_sock_settings_to_clc(smc);
514         if (smc->sk.sk_state == SMC_INIT)
515                 smc->sk.sk_state = SMC_ACTIVE;
516 
517         return rc ? rc : local_contact;
518 
519 decline_rdma_unlock:
520         if (local_contact == SMC_FIRST_CONTACT)
521                 smc_lgr_forget(smc->conn.lgr);
522         mutex_unlock(&smc_create_lgr_pending);
523         smc_conn_free(&smc->conn);
524 decline_rdma:
525         /* RDMA setup failed, switch back to TCP */
526         smc->use_fallback = true;
527         if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
528                 rc = smc_clc_send_decline(smc, reason_code);
529                 if (rc < 0)
530                         goto out_err;
531         }
532         goto out_connected;
533 
534 out_err_unlock:
535         if (local_contact == SMC_FIRST_CONTACT)
536                 smc_lgr_forget(smc->conn.lgr);
537         mutex_unlock(&smc_create_lgr_pending);
538         smc_conn_free(&smc->conn);
539 out_err:
540         if (smc->sk.sk_state == SMC_INIT)
541                 sock_put(&smc->sk); /* passive closing */
542         return rc;
543 }
544 
545 static int smc_connect(struct socket *sock, struct sockaddr *addr,
546                        int alen, int flags)
547 {
548         struct sock *sk = sock->sk;
549         struct smc_sock *smc;
550         int rc = -EINVAL;
551 
552         smc = smc_sk(sk);
553 
554         /* separate smc parameter checking to be safe */
555         if (alen < sizeof(addr->sa_family))
556                 goto out_err;
557         if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
558                 goto out_err;
559 
560         lock_sock(sk);
561         switch (sk->sk_state) {
562         default:
563                 goto out;
564         case SMC_ACTIVE:
565                 rc = -EISCONN;
566                 goto out;
567         case SMC_INIT:
568                 rc = 0;
569                 break;
570         }
571 
572         smc_copy_sock_settings_to_clc(smc);
573         tcp_sk(smc->clcsock->sk)->syn_smc = 1;
574         rc = kernel_connect(smc->clcsock, addr, alen, flags);
575         if (rc)
576                 goto out;
577 
578         /* setup RDMA connection */
579         rc = smc_connect_rdma(smc);
580         if (rc < 0)
581                 goto out;
582         else
583                 rc = 0; /* success cases including fallback */
584 
585 out:
586         release_sock(sk);
587 out_err:
588         return rc;
589 }
590 
591 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
592 {
593         struct socket *new_clcsock = NULL;
594         struct sock *lsk = &lsmc->sk;
595         struct sock *new_sk;
596         int rc;
597 
598         release_sock(lsk);
599         new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
600         if (!new_sk) {
601                 rc = -ENOMEM;
602                 lsk->sk_err = ENOMEM;
603                 *new_smc = NULL;
604                 lock_sock(lsk);
605                 goto out;
606         }
607         *new_smc = smc_sk(new_sk);
608 
609         rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
610         lock_sock(lsk);
611         if  (rc < 0)
612                 lsk->sk_err = -rc;
613         if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
614                 if (new_clcsock)
615                         sock_release(new_clcsock);
616                 new_sk->sk_state = SMC_CLOSED;
617                 sock_set_flag(new_sk, SOCK_DEAD);
618                 new_sk->sk_prot->unhash(new_sk);
619                 sock_put(new_sk); /* final */
620                 *new_smc = NULL;
621                 goto out;
622         }
623 
624         (*new_smc)->clcsock = new_clcsock;
625 out:
626         return rc;
627 }
628 
629 /* add a just created sock to the accept queue of the listen sock as
630  * candidate for a following socket accept call from user space
631  */
632 static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
633 {
634         struct smc_sock *par = smc_sk(parent);
635 
636         sock_hold(sk); /* sock_put in smc_accept_unlink () */
637         spin_lock(&par->accept_q_lock);
638         list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
639         spin_unlock(&par->accept_q_lock);
640         sk_acceptq_added(parent);
641 }
642 
643 /* remove a socket from the accept queue of its parental listening socket */
644 static void smc_accept_unlink(struct sock *sk)
645 {
646         struct smc_sock *par = smc_sk(sk)->listen_smc;
647 
648         spin_lock(&par->accept_q_lock);
649         list_del_init(&smc_sk(sk)->accept_q);
650         spin_unlock(&par->accept_q_lock);
651         sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
652         sock_put(sk); /* sock_hold in smc_accept_enqueue */
653 }
654 
655 /* remove a sock from the accept queue to bind it to a new socket created
656  * for a socket accept call from user space
657  */
658 struct sock *smc_accept_dequeue(struct sock *parent,
659                                 struct socket *new_sock)
660 {
661         struct smc_sock *isk, *n;
662         struct sock *new_sk;
663 
664         list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
665                 new_sk = (struct sock *)isk;
666 
667                 smc_accept_unlink(new_sk);
668                 if (new_sk->sk_state == SMC_CLOSED) {
669                         if (isk->clcsock) {
670                                 sock_release(isk->clcsock);
671                                 isk->clcsock = NULL;
672                         }
673                         new_sk->sk_prot->unhash(new_sk);
674                         sock_put(new_sk); /* final */
675                         continue;
676                 }
677                 if (new_sock)
678                         sock_graft(new_sk, new_sock);
679                 return new_sk;
680         }
681         return NULL;
682 }
683 
684 /* clean up for a created but never accepted sock */
685 void smc_close_non_accepted(struct sock *sk)
686 {
687         struct smc_sock *smc = smc_sk(sk);
688 
689         lock_sock(sk);
690         if (!sk->sk_lingertime)
691                 /* wait for peer closing */
692                 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
693         if (!smc->use_fallback) {
694                 smc_close_active(smc);
695                 sock_set_flag(sk, SOCK_DEAD);
696                 sk->sk_shutdown |= SHUTDOWN_MASK;
697         }
698         if (smc->clcsock) {
699                 struct socket *tcp;
700 
701                 tcp = smc->clcsock;
702                 smc->clcsock = NULL;
703                 sock_release(tcp);
704         }
705         if (smc->use_fallback) {
706                 sock_put(sk); /* passive closing */
707                 sk->sk_state = SMC_CLOSED;
708         } else {
709                 if (sk->sk_state == SMC_CLOSED)
710                         smc_conn_free(&smc->conn);
711         }
712         release_sock(sk);
713         sk->sk_prot->unhash(sk);
714         sock_put(sk); /* final sock_put */
715 }
716 
717 static int smc_serv_conf_first_link(struct smc_sock *smc)
718 {
719         struct smc_link_group *lgr = smc->conn.lgr;
720         struct smc_link *link;
721         int rest;
722         int rc;
723 
724         link = &lgr->lnk[SMC_SINGLE_LINK];
725 
726         if (smc_reg_rmb(link, smc->conn.rmb_desc))
727                 return SMC_CLC_DECL_INTERR;
728 
729         /* send CONFIRM LINK request to client over the RoCE fabric */
730         rc = smc_llc_send_confirm_link(link,
731                                        link->smcibdev->mac[link->ibport - 1],
732                                        &link->smcibdev->gid[link->ibport - 1],
733                                        SMC_LLC_REQ);
734         if (rc < 0)
735                 return SMC_CLC_DECL_TCL;
736 
737         /* receive CONFIRM LINK response from client over the RoCE fabric */
738         rest = wait_for_completion_interruptible_timeout(
739                 &link->llc_confirm_resp,
740                 SMC_LLC_WAIT_FIRST_TIME);
741         if (rest <= 0) {
742                 struct smc_clc_msg_decline dclc;
743 
744                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
745                                       SMC_CLC_DECLINE);
746                 return rc;
747         }
748 
749         if (link->llc_confirm_resp_rc)
750                 return SMC_CLC_DECL_RMBE_EC;
751 
752         /* send ADD LINK request to client over the RoCE fabric */
753         rc = smc_llc_send_add_link(link,
754                                    link->smcibdev->mac[link->ibport - 1],
755                                    &link->smcibdev->gid[link->ibport - 1],
756                                    SMC_LLC_REQ);
757         if (rc < 0)
758                 return SMC_CLC_DECL_TCL;
759 
760         /* receive ADD LINK response from client over the RoCE fabric */
761         rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
762                                                          SMC_LLC_WAIT_TIME);
763         if (rest <= 0) {
764                 struct smc_clc_msg_decline dclc;
765 
766                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
767                                       SMC_CLC_DECLINE);
768                 return rc;
769         }
770 
771         link->state = SMC_LNK_ACTIVE;
772 
773         return 0;
774 }
775 
776 /* setup for RDMA connection of server */
777 static void smc_listen_work(struct work_struct *work)
778 {
779         struct smc_sock *new_smc = container_of(work, struct smc_sock,
780                                                 smc_listen_work);
781         struct smc_clc_msg_proposal_prefix *pclc_prfx;
782         struct socket *newclcsock = new_smc->clcsock;
783         struct smc_sock *lsmc = new_smc->listen_smc;
784         struct smc_clc_msg_accept_confirm cclc;
785         int local_contact = SMC_REUSE_CONTACT;
786         struct sock *newsmcsk = &new_smc->sk;
787         struct smc_clc_msg_proposal *pclc;
788         struct smc_ib_device *smcibdev;
789         u8 buf[SMC_CLC_MAX_LEN];
790         struct smc_link *link;
791         int reason_code = 0;
792         int rc = 0;
793         u8 ibport;
794 
795         /* check if peer is smc capable */
796         if (!tcp_sk(newclcsock->sk)->syn_smc) {
797                 new_smc->use_fallback = true;
798                 goto out_connected;
799         }
800 
801         /* do inband token exchange -
802          *wait for and receive SMC Proposal CLC message
803          */
804         reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf),
805                                        SMC_CLC_PROPOSAL);
806         if (reason_code < 0)
807                 goto out_err;
808         if (reason_code > 0)
809                 goto decline_rdma;
810 
811         /* IPSec connections opt out of SMC-R optimizations */
812         if (using_ipsec(new_smc)) {
813                 reason_code = SMC_CLC_DECL_IPSEC;
814                 goto decline_rdma;
815         }
816 
817         /* PNET table look up: search active ib_device and port
818          * within same PNETID that also contains the ethernet device
819          * used for the internal TCP socket
820          */
821         smc_pnet_find_roce_resource(newclcsock->sk, &smcibdev, &ibport);
822         if (!smcibdev) {
823                 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
824                 goto decline_rdma;
825         }
826 
827         pclc = (struct smc_clc_msg_proposal *)&buf;
828         pclc_prfx = smc_clc_proposal_get_prefix(pclc);
829 
830         rc = smc_clc_prfx_match(newclcsock, pclc_prfx);
831         if (rc) {
832                 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
833                 goto decline_rdma;
834         }
835 
836         /* allocate connection / link group */
837         mutex_lock(&smc_create_lgr_pending);
838         local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
839                                         0);
840         if (local_contact < 0) {
841                 rc = local_contact;
842                 if (rc == -ENOMEM)
843                         reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
844                 goto decline_rdma_unlock;
845         }
846         link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
847 
848         /* create send buffer and rmb */
849         rc = smc_buf_create(new_smc);
850         if (rc) {
851                 reason_code = SMC_CLC_DECL_MEM;
852                 goto decline_rdma_unlock;
853         }
854 
855         smc_close_init(new_smc);
856         smc_rx_init(new_smc);
857 
858         if (local_contact != SMC_FIRST_CONTACT) {
859                 if (!new_smc->conn.rmb_desc->reused) {
860                         if (smc_reg_rmb(link, new_smc->conn.rmb_desc)) {
861                                 reason_code = SMC_CLC_DECL_INTERR;
862                                 goto decline_rdma_unlock;
863                         }
864                 }
865         }
866         smc_rmb_sync_sg_for_device(&new_smc->conn);
867 
868         rc = smc_clc_send_accept(new_smc, local_contact);
869         if (rc)
870                 goto out_err_unlock;
871 
872         /* receive SMC Confirm CLC message */
873         reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
874                                        SMC_CLC_CONFIRM);
875         if (reason_code < 0)
876                 goto out_err_unlock;
877         if (reason_code > 0)
878                 goto decline_rdma_unlock;
879         smc_conn_save_peer_info(new_smc, &cclc);
880         if (local_contact == SMC_FIRST_CONTACT)
881                 smc_link_save_peer_info(link, &cclc);
882 
883         rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc);
884         if (rc) {
885                 reason_code = SMC_CLC_DECL_INTERR;
886                 goto decline_rdma_unlock;
887         }
888 
889         if (local_contact == SMC_FIRST_CONTACT) {
890                 rc = smc_ib_ready_link(link);
891                 if (rc) {
892                         reason_code = SMC_CLC_DECL_INTERR;
893                         goto decline_rdma_unlock;
894                 }
895                 /* QP confirmation over RoCE fabric */
896                 reason_code = smc_serv_conf_first_link(new_smc);
897                 if (reason_code < 0)
898                         /* peer is not aware of a problem */
899                         goto out_err_unlock;
900                 if (reason_code > 0)
901                         goto decline_rdma_unlock;
902         }
903 
904         smc_tx_init(new_smc);
905         mutex_unlock(&smc_create_lgr_pending);
906 
907 out_connected:
908         sk_refcnt_debug_inc(newsmcsk);
909         if (newsmcsk->sk_state == SMC_INIT)
910                 newsmcsk->sk_state = SMC_ACTIVE;
911 enqueue:
912         lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
913         if (lsmc->sk.sk_state == SMC_LISTEN) {
914                 smc_accept_enqueue(&lsmc->sk, newsmcsk);
915         } else { /* no longer listening */
916                 smc_close_non_accepted(newsmcsk);
917         }
918         release_sock(&lsmc->sk);
919 
920         /* Wake up accept */
921         lsmc->sk.sk_data_ready(&lsmc->sk);
922         sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
923         return;
924 
925 decline_rdma_unlock:
926         if (local_contact == SMC_FIRST_CONTACT)
927                 smc_lgr_forget(new_smc->conn.lgr);
928         mutex_unlock(&smc_create_lgr_pending);
929 decline_rdma:
930         /* RDMA setup failed, switch back to TCP */
931         smc_conn_free(&new_smc->conn);
932         new_smc->use_fallback = true;
933         if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
934                 if (smc_clc_send_decline(new_smc, reason_code) < 0)
935                         goto out_err;
936         }
937         goto out_connected;
938 
939 out_err_unlock:
940         if (local_contact == SMC_FIRST_CONTACT)
941                 smc_lgr_forget(new_smc->conn.lgr);
942         mutex_unlock(&smc_create_lgr_pending);
943 out_err:
944         if (newsmcsk->sk_state == SMC_INIT)
945                 sock_put(&new_smc->sk); /* passive closing */
946         newsmcsk->sk_state = SMC_CLOSED;
947         smc_conn_free(&new_smc->conn);
948         goto enqueue; /* queue new sock with sk_err set */
949 }
950 
951 static void smc_tcp_listen_work(struct work_struct *work)
952 {
953         struct smc_sock *lsmc = container_of(work, struct smc_sock,
954                                              tcp_listen_work);
955         struct sock *lsk = &lsmc->sk;
956         struct smc_sock *new_smc;
957         int rc = 0;
958 
959         lock_sock(lsk);
960         while (lsk->sk_state == SMC_LISTEN) {
961                 rc = smc_clcsock_accept(lsmc, &new_smc);
962                 if (rc)
963                         goto out;
964                 if (!new_smc)
965                         continue;
966 
967                 new_smc->listen_smc = lsmc;
968                 new_smc->use_fallback = false; /* assume rdma capability first*/
969                 sock_hold(lsk); /* sock_put in smc_listen_work */
970                 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
971                 smc_copy_sock_settings_to_smc(new_smc);
972                 sock_hold(&new_smc->sk); /* sock_put in passive closing */
973                 if (!schedule_work(&new_smc->smc_listen_work))
974                         sock_put(&new_smc->sk);
975         }
976 
977 out:
978         release_sock(lsk);
979         sock_put(&lsmc->sk); /* sock_hold in smc_listen */
980 }
981 
982 static int smc_listen(struct socket *sock, int backlog)
983 {
984         struct sock *sk = sock->sk;
985         struct smc_sock *smc;
986         int rc;
987 
988         smc = smc_sk(sk);
989         lock_sock(sk);
990 
991         rc = -EINVAL;
992         if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
993                 goto out;
994 
995         rc = 0;
996         if (sk->sk_state == SMC_LISTEN) {
997                 sk->sk_max_ack_backlog = backlog;
998                 goto out;
999         }
1000         /* some socket options are handled in core, so we could not apply
1001          * them to the clc socket -- copy smc socket options to clc socket
1002          */
1003         smc_copy_sock_settings_to_clc(smc);
1004         tcp_sk(smc->clcsock->sk)->syn_smc = 1;
1005 
1006         rc = kernel_listen(smc->clcsock, backlog);
1007         if (rc)
1008                 goto out;
1009         sk->sk_max_ack_backlog = backlog;
1010         sk->sk_ack_backlog = 0;
1011         sk->sk_state = SMC_LISTEN;
1012         INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
1013         sock_hold(sk); /* sock_hold in tcp_listen_worker */
1014         if (!schedule_work(&smc->tcp_listen_work))
1015                 sock_put(sk);
1016 
1017 out:
1018         release_sock(sk);
1019         return rc;
1020 }
1021 
1022 static int smc_accept(struct socket *sock, struct socket *new_sock,
1023                       int flags, bool kern)
1024 {
1025         struct sock *sk = sock->sk, *nsk;
1026         DECLARE_WAITQUEUE(wait, current);
1027         struct smc_sock *lsmc;
1028         long timeo;
1029         int rc = 0;
1030 
1031         lsmc = smc_sk(sk);
1032         sock_hold(sk); /* sock_put below */
1033         lock_sock(sk);
1034 
1035         if (lsmc->sk.sk_state != SMC_LISTEN) {
1036                 rc = -EINVAL;
1037                 goto out;
1038         }
1039 
1040         /* Wait for an incoming connection */
1041         timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1042         add_wait_queue_exclusive(sk_sleep(sk), &wait);
1043         while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
1044                 set_current_state(TASK_INTERRUPTIBLE);
1045                 if (!timeo) {
1046                         rc = -EAGAIN;
1047                         break;
1048                 }
1049                 release_sock(sk);
1050                 timeo = schedule_timeout(timeo);
1051                 /* wakeup by sk_data_ready in smc_listen_work() */
1052                 sched_annotate_sleep();
1053                 lock_sock(sk);
1054                 if (signal_pending(current)) {
1055                         rc = sock_intr_errno(timeo);
1056                         break;
1057                 }
1058         }
1059         set_current_state(TASK_RUNNING);
1060         remove_wait_queue(sk_sleep(sk), &wait);
1061 
1062         if (!rc)
1063                 rc = sock_error(nsk);
1064 
1065 out:
1066         release_sock(sk);
1067         sock_put(sk); /* sock_hold above */
1068         return rc;
1069 }
1070 
1071 static int smc_getname(struct socket *sock, struct sockaddr *addr,
1072                        int peer)
1073 {
1074         struct smc_sock *smc;
1075 
1076         if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
1077             (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
1078                 return -ENOTCONN;
1079 
1080         smc = smc_sk(sock->sk);
1081 
1082         return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
1083 }
1084 
1085 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1086 {
1087         struct sock *sk = sock->sk;
1088         struct smc_sock *smc;
1089         int rc = -EPIPE;
1090 
1091         smc = smc_sk(sk);
1092         lock_sock(sk);
1093         if ((sk->sk_state != SMC_ACTIVE) &&
1094             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1095             (sk->sk_state != SMC_INIT))
1096                 goto out;
1097         if (smc->use_fallback)
1098                 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
1099         else
1100                 rc = smc_tx_sendmsg(smc, msg, len);
1101 out:
1102         release_sock(sk);
1103         return rc;
1104 }
1105 
1106 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1107                        int flags)
1108 {
1109         struct sock *sk = sock->sk;
1110         struct smc_sock *smc;
1111         int rc = -ENOTCONN;
1112 
1113         smc = smc_sk(sk);
1114         lock_sock(sk);
1115         if ((sk->sk_state == SMC_INIT) ||
1116             (sk->sk_state == SMC_LISTEN) ||
1117             (sk->sk_state == SMC_CLOSED))
1118                 goto out;
1119 
1120         if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1121                 rc = 0;
1122                 goto out;
1123         }
1124 
1125         if (smc->use_fallback)
1126                 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
1127         else
1128                 rc = smc_rx_recvmsg(smc, msg, len, flags);
1129 
1130 out:
1131         release_sock(sk);
1132         return rc;
1133 }
1134 
1135 static __poll_t smc_accept_poll(struct sock *parent)
1136 {
1137         struct smc_sock *isk = smc_sk(parent);
1138         __poll_t mask = 0;
1139 
1140         spin_lock(&isk->accept_q_lock);
1141         if (!list_empty(&isk->accept_q))
1142                 mask = EPOLLIN | EPOLLRDNORM;
1143         spin_unlock(&isk->accept_q_lock);
1144 
1145         return mask;
1146 }
1147 
1148 static __poll_t smc_poll(struct file *file, struct socket *sock,
1149                              poll_table *wait)
1150 {
1151         struct sock *sk = sock->sk;
1152         __poll_t mask = 0;
1153         struct smc_sock *smc;
1154         int rc;
1155 
1156         if (!sk)
1157                 return EPOLLNVAL;
1158 
1159         smc = smc_sk(sock->sk);
1160         sock_hold(sk);
1161         lock_sock(sk);
1162         if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
1163                 /* delegate to CLC child sock */
1164                 release_sock(sk);
1165                 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
1166                 lock_sock(sk);
1167                 sk->sk_err = smc->clcsock->sk->sk_err;
1168                 if (sk->sk_err) {
1169                         mask |= EPOLLERR;
1170                 } else {
1171                         /* if non-blocking connect finished ... */
1172                         if (sk->sk_state == SMC_INIT &&
1173                             mask & EPOLLOUT &&
1174                             smc->clcsock->sk->sk_state != TCP_CLOSE) {
1175                                 rc = smc_connect_rdma(smc);
1176                                 if (rc < 0)
1177                                         mask |= EPOLLERR;
1178                                 /* success cases including fallback */
1179                                 mask |= EPOLLOUT | EPOLLWRNORM;
1180                         }
1181                 }
1182         } else {
1183                 if (sk->sk_state != SMC_CLOSED) {
1184                         release_sock(sk);
1185                         sock_poll_wait(file, sk_sleep(sk), wait);
1186                         lock_sock(sk);
1187                 }
1188                 if (sk->sk_err)
1189                         mask |= EPOLLERR;
1190                 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
1191                     (sk->sk_state == SMC_CLOSED))
1192                         mask |= EPOLLHUP;
1193                 if (sk->sk_state == SMC_LISTEN) {
1194                         /* woken up by sk_data_ready in smc_listen_work() */
1195                         mask = smc_accept_poll(sk);
1196                 } else {
1197                         if (atomic_read(&smc->conn.sndbuf_space) ||
1198                             sk->sk_shutdown & SEND_SHUTDOWN) {
1199                                 mask |= EPOLLOUT | EPOLLWRNORM;
1200                         } else {
1201                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1202                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1203                         }
1204                         if (atomic_read(&smc->conn.bytes_to_rcv))
1205                                 mask |= EPOLLIN | EPOLLRDNORM;
1206                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1207                                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
1208                         if (sk->sk_state == SMC_APPCLOSEWAIT1)
1209                                 mask |= EPOLLIN;
1210                 }
1211 
1212         }
1213         release_sock(sk);
1214         sock_put(sk);
1215 
1216         return mask;
1217 }
1218 
1219 static int smc_shutdown(struct socket *sock, int how)
1220 {
1221         struct sock *sk = sock->sk;
1222         struct smc_sock *smc;
1223         int rc = -EINVAL;
1224         int rc1 = 0;
1225 
1226         smc = smc_sk(sk);
1227 
1228         if ((how < SHUT_RD) || (how > SHUT_RDWR))
1229                 return rc;
1230 
1231         lock_sock(sk);
1232 
1233         rc = -ENOTCONN;
1234         if ((sk->sk_state != SMC_ACTIVE) &&
1235             (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
1236             (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
1237             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1238             (sk->sk_state != SMC_APPCLOSEWAIT2) &&
1239             (sk->sk_state != SMC_APPFINCLOSEWAIT))
1240                 goto out;
1241         if (smc->use_fallback) {
1242                 rc = kernel_sock_shutdown(smc->clcsock, how);
1243                 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
1244                 if (sk->sk_shutdown == SHUTDOWN_MASK)
1245                         sk->sk_state = SMC_CLOSED;
1246                 goto out;
1247         }
1248         switch (how) {
1249         case SHUT_RDWR:         /* shutdown in both directions */
1250                 rc = smc_close_active(smc);
1251                 break;
1252         case SHUT_WR:
1253                 rc = smc_close_shutdown_write(smc);
1254                 break;
1255         case SHUT_RD:
1256                 rc = 0;
1257                 /* nothing more to do because peer is not involved */
1258                 break;
1259         }
1260         if (smc->clcsock)
1261                 rc1 = kernel_sock_shutdown(smc->clcsock, how);
1262         /* map sock_shutdown_cmd constants to sk_shutdown value range */
1263         sk->sk_shutdown |= how + 1;
1264 
1265 out:
1266         release_sock(sk);
1267         return rc ? rc : rc1;
1268 }
1269 
1270 static int smc_setsockopt(struct socket *sock, int level, int optname,
1271                           char __user *optval, unsigned int optlen)
1272 {
1273         struct sock *sk = sock->sk;
1274         struct smc_sock *smc;
1275 
1276         smc = smc_sk(sk);
1277 
1278         /* generic setsockopts reaching us here always apply to the
1279          * CLC socket
1280          */
1281         return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
1282                                              optval, optlen);
1283 }
1284 
1285 static int smc_getsockopt(struct socket *sock, int level, int optname,
1286                           char __user *optval, int __user *optlen)
1287 {
1288         struct smc_sock *smc;
1289 
1290         smc = smc_sk(sock->sk);
1291         /* socket options apply to the CLC socket */
1292         return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
1293                                              optval, optlen);
1294 }
1295 
1296 static int smc_ioctl(struct socket *sock, unsigned int cmd,
1297                      unsigned long arg)
1298 {
1299         struct smc_sock *smc;
1300 
1301         smc = smc_sk(sock->sk);
1302         if (smc->use_fallback)
1303                 return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
1304         else
1305                 return sock_no_ioctl(sock, cmd, arg);
1306 }
1307 
1308 static ssize_t smc_sendpage(struct socket *sock, struct page *page,
1309                             int offset, size_t size, int flags)
1310 {
1311         struct sock *sk = sock->sk;
1312         struct smc_sock *smc;
1313         int rc = -EPIPE;
1314 
1315         smc = smc_sk(sk);
1316         lock_sock(sk);
1317         if (sk->sk_state != SMC_ACTIVE) {
1318                 release_sock(sk);
1319                 goto out;
1320         }
1321         release_sock(sk);
1322         if (smc->use_fallback)
1323                 rc = kernel_sendpage(smc->clcsock, page, offset,
1324                                      size, flags);
1325         else
1326                 rc = sock_no_sendpage(sock, page, offset, size, flags);
1327 
1328 out:
1329         return rc;
1330 }
1331 
1332 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
1333                                struct pipe_inode_info *pipe, size_t len,
1334                                     unsigned int flags)
1335 {
1336         struct sock *sk = sock->sk;
1337         struct smc_sock *smc;
1338         int rc = -ENOTCONN;
1339 
1340         smc = smc_sk(sk);
1341         lock_sock(sk);
1342         if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED))
1343                 goto out;
1344         if (smc->use_fallback) {
1345                 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
1346                                                     pipe, len, flags);
1347         } else {
1348                 rc = -EOPNOTSUPP;
1349         }
1350 out:
1351         release_sock(sk);
1352         return rc;
1353 }
1354 
1355 /* must look like tcp */
1356 static const struct proto_ops smc_sock_ops = {
1357         .family         = PF_SMC,
1358         .owner          = THIS_MODULE,
1359         .release        = smc_release,
1360         .bind           = smc_bind,
1361         .connect        = smc_connect,
1362         .socketpair     = sock_no_socketpair,
1363         .accept         = smc_accept,
1364         .getname        = smc_getname,
1365         .poll           = smc_poll,
1366         .ioctl          = smc_ioctl,
1367         .listen         = smc_listen,
1368         .shutdown       = smc_shutdown,
1369         .setsockopt     = smc_setsockopt,
1370         .getsockopt     = smc_getsockopt,
1371         .sendmsg        = smc_sendmsg,
1372         .recvmsg        = smc_recvmsg,
1373         .mmap           = sock_no_mmap,
1374         .sendpage       = smc_sendpage,
1375         .splice_read    = smc_splice_read,
1376 };
1377 
1378 static int smc_create(struct net *net, struct socket *sock, int protocol,
1379                       int kern)
1380 {
1381         int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
1382         struct smc_sock *smc;
1383         struct sock *sk;
1384         int rc;
1385 
1386         rc = -ESOCKTNOSUPPORT;
1387         if (sock->type != SOCK_STREAM)
1388                 goto out;
1389 
1390         rc = -EPROTONOSUPPORT;
1391         if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
1392                 goto out;
1393 
1394         rc = -ENOBUFS;
1395         sock->ops = &smc_sock_ops;
1396         sk = smc_sock_alloc(net, sock, protocol);
1397         if (!sk)
1398                 goto out;
1399 
1400         /* create internal TCP socket for CLC handshake and fallback */
1401         smc = smc_sk(sk);
1402         smc->use_fallback = false; /* assume rdma capability first */
1403         rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
1404                               &smc->clcsock);
1405         if (rc) {
1406                 sk_common_release(sk);
1407                 goto out;
1408         }
1409         smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
1410         smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
1411 
1412 out:
1413         return rc;
1414 }
1415 
1416 static const struct net_proto_family smc_sock_family_ops = {
1417         .family = PF_SMC,
1418         .owner  = THIS_MODULE,
1419         .create = smc_create,
1420 };
1421 
1422 static int __init smc_init(void)
1423 {
1424         int rc;
1425 
1426         rc = smc_pnet_init();
1427         if (rc)
1428                 return rc;
1429 
1430         rc = smc_llc_init();
1431         if (rc) {
1432                 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
1433                 goto out_pnet;
1434         }
1435 
1436         rc = smc_cdc_init();
1437         if (rc) {
1438                 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
1439                 goto out_pnet;
1440         }
1441 
1442         rc = proto_register(&smc_proto, 1);
1443         if (rc) {
1444                 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
1445                 goto out_pnet;
1446         }
1447 
1448         rc = proto_register(&smc_proto6, 1);
1449         if (rc) {
1450                 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
1451                 goto out_proto;
1452         }
1453 
1454         rc = sock_register(&smc_sock_family_ops);
1455         if (rc) {
1456                 pr_err("%s: sock_register fails with %d\n", __func__, rc);
1457                 goto out_proto6;
1458         }
1459         INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
1460         INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
1461 
1462         rc = smc_ib_register_client();
1463         if (rc) {
1464                 pr_err("%s: ib_register fails with %d\n", __func__, rc);
1465                 goto out_sock;
1466         }
1467 
1468         static_branch_enable(&tcp_have_smc);
1469         return 0;
1470 
1471 out_sock:
1472         sock_unregister(PF_SMC);
1473 out_proto6:
1474         proto_unregister(&smc_proto6);
1475 out_proto:
1476         proto_unregister(&smc_proto);
1477 out_pnet:
1478         smc_pnet_exit();
1479         return rc;
1480 }
1481 
1482 static void __exit smc_exit(void)
1483 {
1484         struct smc_link_group *lgr, *lg;
1485         LIST_HEAD(lgr_freeing_list);
1486 
1487         spin_lock_bh(&smc_lgr_list.lock);
1488         if (!list_empty(&smc_lgr_list.list))
1489                 list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
1490         spin_unlock_bh(&smc_lgr_list.lock);
1491         list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
1492                 list_del_init(&lgr->list);
1493                 cancel_delayed_work_sync(&lgr->free_work);
1494                 smc_lgr_free(lgr); /* free link group */
1495         }
1496         static_branch_disable(&tcp_have_smc);
1497         smc_ib_unregister_client();
1498         sock_unregister(PF_SMC);
1499         proto_unregister(&smc_proto6);
1500         proto_unregister(&smc_proto);
1501         smc_pnet_exit();
1502 }
1503 
1504 module_init(smc_init);
1505 module_exit(smc_exit);
1506 
1507 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
1508 MODULE_DESCRIPTION("smc socket address family");
1509 MODULE_LICENSE("GPL");
1510 MODULE_ALIAS_NETPROTO(PF_SMC);
1511 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp