~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/testing/selftests/net/msg_zerocopy.c

Version: ~ [ linux-5.16-rc1 ] ~ [ linux-5.15.2 ] ~ [ linux-5.14.18 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.79 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.159 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.217 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.255 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.290 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.292 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* Evaluate MSG_ZEROCOPY
  2  *
  3  * Send traffic between two processes over one of the supported
  4  * protocols and modes:
  5  *
  6  * PF_INET/PF_INET6
  7  * - SOCK_STREAM
  8  * - SOCK_DGRAM
  9  * - SOCK_DGRAM with UDP_CORK
 10  * - SOCK_RAW
 11  * - SOCK_RAW with IP_HDRINCL
 12  *
 13  * PF_PACKET
 14  * - SOCK_DGRAM
 15  * - SOCK_RAW
 16  *
 17  * PF_RDS
 18  * - SOCK_SEQPACKET
 19  *
 20  * Start this program on two connected hosts, one in send mode and
 21  * the other with option '-r' to put it in receiver mode.
 22  *
 23  * If zerocopy mode ('-z') is enabled, the sender will verify that
 24  * the kernel queues completions on the error queue for all zerocopy
 25  * transfers.
 26  */
 27 
 28 #define _GNU_SOURCE
 29 
 30 #include <arpa/inet.h>
 31 #include <error.h>
 32 #include <errno.h>
 33 #include <limits.h>
 34 #include <linux/errqueue.h>
 35 #include <linux/if_packet.h>
 36 #include <linux/ipv6.h>
 37 #include <linux/socket.h>
 38 #include <linux/sockios.h>
 39 #include <net/ethernet.h>
 40 #include <net/if.h>
 41 #include <netinet/ip.h>
 42 #include <netinet/ip6.h>
 43 #include <netinet/tcp.h>
 44 #include <netinet/udp.h>
 45 #include <poll.h>
 46 #include <sched.h>
 47 #include <stdbool.h>
 48 #include <stdio.h>
 49 #include <stdint.h>
 50 #include <stdlib.h>
 51 #include <string.h>
 52 #include <sys/ioctl.h>
 53 #include <sys/socket.h>
 54 #include <sys/stat.h>
 55 #include <sys/time.h>
 56 #include <sys/types.h>
 57 #include <sys/wait.h>
 58 #include <unistd.h>
 59 #include <linux/rds.h>
 60 
 61 #ifndef SO_EE_ORIGIN_ZEROCOPY
 62 #define SO_EE_ORIGIN_ZEROCOPY           5
 63 #endif
 64 
 65 #ifndef SO_ZEROCOPY
 66 #define SO_ZEROCOPY     60
 67 #endif
 68 
 69 #ifndef SO_EE_CODE_ZEROCOPY_COPIED
 70 #define SO_EE_CODE_ZEROCOPY_COPIED      1
 71 #endif
 72 
 73 #ifndef MSG_ZEROCOPY
 74 #define MSG_ZEROCOPY    0x4000000
 75 #endif
 76 
 77 static int  cfg_cork;
 78 static bool cfg_cork_mixed;
 79 static int  cfg_cpu             = -1;           /* default: pin to last cpu */
 80 static int  cfg_family          = PF_UNSPEC;
 81 static int  cfg_ifindex         = 1;
 82 static int  cfg_payload_len;
 83 static int  cfg_port            = 8000;
 84 static bool cfg_rx;
 85 static int  cfg_runtime_ms      = 4200;
 86 static int  cfg_verbose;
 87 static int  cfg_waittime_ms     = 500;
 88 static bool cfg_zerocopy;
 89 
 90 static socklen_t cfg_alen;
 91 static struct sockaddr_storage cfg_dst_addr;
 92 static struct sockaddr_storage cfg_src_addr;
 93 
 94 static char payload[IP_MAXPACKET];
 95 static long packets, bytes, completions, expected_completions;
 96 static int  zerocopied = -1;
 97 static uint32_t next_completion;
 98 
 99 static unsigned long gettimeofday_ms(void)
100 {
101         struct timeval tv;
102 
103         gettimeofday(&tv, NULL);
104         return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
105 }
106 
107 static uint16_t get_ip_csum(const uint16_t *start, int num_words)
108 {
109         unsigned long sum = 0;
110         int i;
111 
112         for (i = 0; i < num_words; i++)
113                 sum += start[i];
114 
115         while (sum >> 16)
116                 sum = (sum & 0xFFFF) + (sum >> 16);
117 
118         return ~sum;
119 }
120 
121 static int do_setcpu(int cpu)
122 {
123         cpu_set_t mask;
124 
125         CPU_ZERO(&mask);
126         CPU_SET(cpu, &mask);
127         if (sched_setaffinity(0, sizeof(mask), &mask))
128                 fprintf(stderr, "cpu: unable to pin, may increase variance.\n");
129         else if (cfg_verbose)
130                 fprintf(stderr, "cpu: %u\n", cpu);
131 
132         return 0;
133 }
134 
135 static void do_setsockopt(int fd, int level, int optname, int val)
136 {
137         if (setsockopt(fd, level, optname, &val, sizeof(val)))
138                 error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
139 }
140 
141 static int do_poll(int fd, int events)
142 {
143         struct pollfd pfd;
144         int ret;
145 
146         pfd.events = events;
147         pfd.revents = 0;
148         pfd.fd = fd;
149 
150         ret = poll(&pfd, 1, cfg_waittime_ms);
151         if (ret == -1)
152                 error(1, errno, "poll");
153 
154         return ret && (pfd.revents & events);
155 }
156 
157 static int do_accept(int fd)
158 {
159         int fda = fd;
160 
161         fd = accept(fda, NULL, NULL);
162         if (fd == -1)
163                 error(1, errno, "accept");
164         if (close(fda))
165                 error(1, errno, "close listen sock");
166 
167         return fd;
168 }
169 
170 static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
171 {
172         struct cmsghdr *cm;
173 
174         if (!msg->msg_control)
175                 error(1, errno, "NULL cookie");
176         cm = (void *)msg->msg_control;
177         cm->cmsg_len = CMSG_LEN(sizeof(cookie));
178         cm->cmsg_level = SOL_RDS;
179         cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
180         memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
181 }
182 
183 static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
184 {
185         int ret, len, i, flags;
186         static uint32_t cookie;
187         char ckbuf[CMSG_SPACE(sizeof(cookie))];
188 
189         len = 0;
190         for (i = 0; i < msg->msg_iovlen; i++)
191                 len += msg->msg_iov[i].iov_len;
192 
193         flags = MSG_DONTWAIT;
194         if (do_zerocopy) {
195                 flags |= MSG_ZEROCOPY;
196                 if (domain == PF_RDS) {
197                         memset(&msg->msg_control, 0, sizeof(msg->msg_control));
198                         msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
199                         msg->msg_control = (struct cmsghdr *)ckbuf;
200                         add_zcopy_cookie(msg, ++cookie);
201                 }
202         }
203 
204         ret = sendmsg(fd, msg, flags);
205         if (ret == -1 && errno == EAGAIN)
206                 return false;
207         if (ret == -1)
208                 error(1, errno, "send");
209         if (cfg_verbose && ret != len)
210                 fprintf(stderr, "send: ret=%u != %u\n", ret, len);
211 
212         if (len) {
213                 packets++;
214                 bytes += ret;
215                 if (do_zerocopy && ret)
216                         expected_completions++;
217         }
218         if (do_zerocopy && domain == PF_RDS) {
219                 msg->msg_control = NULL;
220                 msg->msg_controllen = 0;
221         }
222 
223         return true;
224 }
225 
226 static void do_sendmsg_corked(int fd, struct msghdr *msg)
227 {
228         bool do_zerocopy = cfg_zerocopy;
229         int i, payload_len, extra_len;
230 
231         /* split up the packet. for non-multiple, make first buffer longer */
232         payload_len = cfg_payload_len / cfg_cork;
233         extra_len = cfg_payload_len - (cfg_cork * payload_len);
234 
235         do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
236 
237         for (i = 0; i < cfg_cork; i++) {
238 
239                 /* in mixed-frags mode, alternate zerocopy and copy frags
240                  * start with non-zerocopy, to ensure attach later works
241                  */
242                 if (cfg_cork_mixed)
243                         do_zerocopy = (i & 1);
244 
245                 msg->msg_iov[0].iov_len = payload_len + extra_len;
246                 extra_len = 0;
247 
248                 do_sendmsg(fd, msg, do_zerocopy,
249                            (cfg_dst_addr.ss_family == AF_INET ?
250                             PF_INET : PF_INET6));
251         }
252 
253         do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
254 }
255 
256 static int setup_iph(struct iphdr *iph, uint16_t payload_len)
257 {
258         struct sockaddr_in *daddr = (void *) &cfg_dst_addr;
259         struct sockaddr_in *saddr = (void *) &cfg_src_addr;
260 
261         memset(iph, 0, sizeof(*iph));
262 
263         iph->version    = 4;
264         iph->tos        = 0;
265         iph->ihl        = 5;
266         iph->ttl        = 2;
267         iph->saddr      = saddr->sin_addr.s_addr;
268         iph->daddr      = daddr->sin_addr.s_addr;
269         iph->protocol   = IPPROTO_EGP;
270         iph->tot_len    = htons(sizeof(*iph) + payload_len);
271         iph->check      = get_ip_csum((void *) iph, iph->ihl << 1);
272 
273         return sizeof(*iph);
274 }
275 
276 static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
277 {
278         struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr;
279         struct sockaddr_in6 *saddr = (void *) &cfg_src_addr;
280 
281         memset(ip6h, 0, sizeof(*ip6h));
282 
283         ip6h->version           = 6;
284         ip6h->payload_len       = htons(payload_len);
285         ip6h->nexthdr           = IPPROTO_EGP;
286         ip6h->hop_limit         = 2;
287         ip6h->saddr             = saddr->sin6_addr;
288         ip6h->daddr             = daddr->sin6_addr;
289 
290         return sizeof(*ip6h);
291 }
292 
293 
294 static void setup_sockaddr(int domain, const char *str_addr,
295                            struct sockaddr_storage *sockaddr)
296 {
297         struct sockaddr_in6 *addr6 = (void *) sockaddr;
298         struct sockaddr_in *addr4 = (void *) sockaddr;
299 
300         switch (domain) {
301         case PF_INET:
302                 memset(addr4, 0, sizeof(*addr4));
303                 addr4->sin_family = AF_INET;
304                 addr4->sin_port = htons(cfg_port);
305                 if (str_addr &&
306                     inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
307                         error(1, 0, "ipv4 parse error: %s", str_addr);
308                 break;
309         case PF_INET6:
310                 memset(addr6, 0, sizeof(*addr6));
311                 addr6->sin6_family = AF_INET6;
312                 addr6->sin6_port = htons(cfg_port);
313                 if (str_addr &&
314                     inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
315                         error(1, 0, "ipv6 parse error: %s", str_addr);
316                 break;
317         default:
318                 error(1, 0, "illegal domain");
319         }
320 }
321 
322 static int do_setup_tx(int domain, int type, int protocol)
323 {
324         int fd;
325 
326         fd = socket(domain, type, protocol);
327         if (fd == -1)
328                 error(1, errno, "socket t");
329 
330         do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
331         if (cfg_zerocopy)
332                 do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
333 
334         if (domain != PF_PACKET && domain != PF_RDS)
335                 if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
336                         error(1, errno, "connect");
337 
338         if (domain == PF_RDS) {
339                 if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
340                         error(1, errno, "bind");
341         }
342 
343         return fd;
344 }
345 
346 static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
347 {
348         int i;
349 
350         if (ck->num > RDS_MAX_ZCOOKIES)
351                 error(1, 0, "Returned %d cookies, max expected %d\n",
352                       ck->num, RDS_MAX_ZCOOKIES);
353         for (i = 0; i < ck->num; i++)
354                 if (cfg_verbose >= 2)
355                         fprintf(stderr, "%d\n", ck->cookies[i]);
356         return ck->num;
357 }
358 
359 static bool do_recvmsg_completion(int fd)
360 {
361         char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
362         struct rds_zcopy_cookies *ck;
363         struct cmsghdr *cmsg;
364         struct msghdr msg;
365         bool ret = false;
366 
367         memset(&msg, 0, sizeof(msg));
368         msg.msg_control = cmsgbuf;
369         msg.msg_controllen = sizeof(cmsgbuf);
370 
371         if (recvmsg(fd, &msg, MSG_DONTWAIT))
372                 return ret;
373 
374         if (msg.msg_flags & MSG_CTRUNC)
375                 error(1, errno, "recvmsg notification: truncated");
376 
377         for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
378                 if (cmsg->cmsg_level == SOL_RDS &&
379                     cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
380 
381                         ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
382                         completions += do_process_zerocopy_cookies(ck);
383                         ret = true;
384                         break;
385                 }
386                 error(0, 0, "ignoring cmsg at level %d type %d\n",
387                             cmsg->cmsg_level, cmsg->cmsg_type);
388         }
389         return ret;
390 }
391 
392 static bool do_recv_completion(int fd, int domain)
393 {
394         struct sock_extended_err *serr;
395         struct msghdr msg = {};
396         struct cmsghdr *cm;
397         uint32_t hi, lo, range;
398         int ret, zerocopy;
399         char control[100];
400 
401         if (domain == PF_RDS)
402                 return do_recvmsg_completion(fd);
403 
404         msg.msg_control = control;
405         msg.msg_controllen = sizeof(control);
406 
407         ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
408         if (ret == -1 && errno == EAGAIN)
409                 return false;
410         if (ret == -1)
411                 error(1, errno, "recvmsg notification");
412         if (msg.msg_flags & MSG_CTRUNC)
413                 error(1, errno, "recvmsg notification: truncated");
414 
415         cm = CMSG_FIRSTHDR(&msg);
416         if (!cm)
417                 error(1, 0, "cmsg: no cmsg");
418         if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
419               (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
420               (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
421                 error(1, 0, "serr: wrong type: %d.%d",
422                       cm->cmsg_level, cm->cmsg_type);
423 
424         serr = (void *) CMSG_DATA(cm);
425 
426         if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
427                 error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
428         if (serr->ee_errno != 0)
429                 error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
430 
431         hi = serr->ee_data;
432         lo = serr->ee_info;
433         range = hi - lo + 1;
434 
435         /* Detect notification gaps. These should not happen often, if at all.
436          * Gaps can occur due to drops, reordering and retransmissions.
437          */
438         if (lo != next_completion)
439                 fprintf(stderr, "gap: %u..%u does not append to %u\n",
440                         lo, hi, next_completion);
441         next_completion = hi + 1;
442 
443         zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
444         if (zerocopied == -1)
445                 zerocopied = zerocopy;
446         else if (zerocopied != zerocopy) {
447                 fprintf(stderr, "serr: inconsistent\n");
448                 zerocopied = zerocopy;
449         }
450 
451         if (cfg_verbose >= 2)
452                 fprintf(stderr, "completed: %u (h=%u l=%u)\n",
453                         range, hi, lo);
454 
455         completions += range;
456         return true;
457 }
458 
459 /* Read all outstanding messages on the errqueue */
460 static void do_recv_completions(int fd, int domain)
461 {
462         while (do_recv_completion(fd, domain)) {}
463 }
464 
465 /* Wait for all remaining completions on the errqueue */
466 static void do_recv_remaining_completions(int fd, int domain)
467 {
468         int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
469 
470         while (completions < expected_completions &&
471                gettimeofday_ms() < tstop) {
472                 if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
473                         do_recv_completions(fd, domain);
474         }
475 
476         if (completions < expected_completions)
477                 fprintf(stderr, "missing notifications: %lu < %lu\n",
478                         completions, expected_completions);
479 }
480 
481 static void do_tx(int domain, int type, int protocol)
482 {
483         struct iovec iov[3] = { {0} };
484         struct sockaddr_ll laddr;
485         struct msghdr msg = {0};
486         struct ethhdr eth;
487         union {
488                 struct ipv6hdr ip6h;
489                 struct iphdr iph;
490         } nh;
491         uint64_t tstop;
492         int fd;
493 
494         fd = do_setup_tx(domain, type, protocol);
495 
496         if (domain == PF_PACKET) {
497                 uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6;
498 
499                 /* sock_raw passes ll header as data */
500                 if (type == SOCK_RAW) {
501                         memset(eth.h_dest, 0x06, ETH_ALEN);
502                         memset(eth.h_source, 0x02, ETH_ALEN);
503                         eth.h_proto = htons(proto);
504                         iov[0].iov_base = &eth;
505                         iov[0].iov_len = sizeof(eth);
506                         msg.msg_iovlen++;
507                 }
508 
509                 /* both sock_raw and sock_dgram expect name */
510                 memset(&laddr, 0, sizeof(laddr));
511                 laddr.sll_family        = AF_PACKET;
512                 laddr.sll_ifindex       = cfg_ifindex;
513                 laddr.sll_protocol      = htons(proto);
514                 laddr.sll_halen         = ETH_ALEN;
515 
516                 memset(laddr.sll_addr, 0x06, ETH_ALEN);
517 
518                 msg.msg_name            = &laddr;
519                 msg.msg_namelen         = sizeof(laddr);
520         }
521 
522         /* packet and raw sockets with hdrincl must pass network header */
523         if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
524                 if (cfg_family == PF_INET)
525                         iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len);
526                 else
527                         iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len);
528 
529                 iov[1].iov_base = (void *) &nh;
530                 msg.msg_iovlen++;
531         }
532 
533         if (domain == PF_RDS) {
534                 msg.msg_name = &cfg_dst_addr;
535                 msg.msg_namelen =  (cfg_dst_addr.ss_family == AF_INET ?
536                                     sizeof(struct sockaddr_in) :
537                                     sizeof(struct sockaddr_in6));
538         }
539 
540         iov[2].iov_base = payload;
541         iov[2].iov_len = cfg_payload_len;
542         msg.msg_iovlen++;
543         msg.msg_iov = &iov[3 - msg.msg_iovlen];
544 
545         tstop = gettimeofday_ms() + cfg_runtime_ms;
546         do {
547                 if (cfg_cork)
548                         do_sendmsg_corked(fd, &msg);
549                 else
550                         do_sendmsg(fd, &msg, cfg_zerocopy, domain);
551 
552                 while (!do_poll(fd, POLLOUT)) {
553                         if (cfg_zerocopy)
554                                 do_recv_completions(fd, domain);
555                 }
556 
557         } while (gettimeofday_ms() < tstop);
558 
559         if (cfg_zerocopy)
560                 do_recv_remaining_completions(fd, domain);
561 
562         if (close(fd))
563                 error(1, errno, "close");
564 
565         fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
566                 packets, bytes >> 20, completions,
567                 zerocopied == 1 ? 'y' : 'n');
568 }
569 
570 static int do_setup_rx(int domain, int type, int protocol)
571 {
572         int fd;
573 
574         /* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW,
575          * to recv the only copy of the packet, not a clone
576          */
577         if (domain == PF_PACKET)
578                 error(1, 0, "Use PF_INET/SOCK_RAW to read");
579 
580         if (type == SOCK_RAW && protocol == IPPROTO_RAW)
581                 error(1, 0, "IPPROTO_RAW: not supported on Rx");
582 
583         fd = socket(domain, type, protocol);
584         if (fd == -1)
585                 error(1, errno, "socket r");
586 
587         do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21);
588         do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16);
589         do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
590 
591         if (bind(fd, (void *) &cfg_dst_addr, cfg_alen))
592                 error(1, errno, "bind");
593 
594         if (type == SOCK_STREAM) {
595                 if (listen(fd, 1))
596                         error(1, errno, "listen");
597                 fd = do_accept(fd);
598         }
599 
600         return fd;
601 }
602 
603 /* Flush all outstanding bytes for the tcp receive queue */
604 static void do_flush_tcp(int fd)
605 {
606         int ret;
607 
608         /* MSG_TRUNC flushes up to len bytes */
609         ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
610         if (ret == -1 && errno == EAGAIN)
611                 return;
612         if (ret == -1)
613                 error(1, errno, "flush");
614         if (!ret)
615                 return;
616 
617         packets++;
618         bytes += ret;
619 }
620 
621 /* Flush all outstanding datagrams. Verify first few bytes of each. */
622 static void do_flush_datagram(int fd, int type)
623 {
624         int ret, off = 0;
625         char buf[64];
626 
627         /* MSG_TRUNC will return full datagram length */
628         ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
629         if (ret == -1 && errno == EAGAIN)
630                 return;
631 
632         /* raw ipv4 return with header, raw ipv6 without */
633         if (cfg_family == PF_INET && type == SOCK_RAW) {
634                 off += sizeof(struct iphdr);
635                 ret -= sizeof(struct iphdr);
636         }
637 
638         if (ret == -1)
639                 error(1, errno, "recv");
640         if (ret != cfg_payload_len)
641                 error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
642         if (ret > sizeof(buf) - off)
643                 ret = sizeof(buf) - off;
644         if (memcmp(buf + off, payload, ret))
645                 error(1, 0, "recv: data mismatch");
646 
647         packets++;
648         bytes += cfg_payload_len;
649 }
650 
651 static void do_rx(int domain, int type, int protocol)
652 {
653         const int cfg_receiver_wait_ms = 400;
654         uint64_t tstop;
655         int fd;
656 
657         fd = do_setup_rx(domain, type, protocol);
658 
659         tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms;
660         do {
661                 if (type == SOCK_STREAM)
662                         do_flush_tcp(fd);
663                 else
664                         do_flush_datagram(fd, type);
665 
666                 do_poll(fd, POLLIN);
667 
668         } while (gettimeofday_ms() < tstop);
669 
670         if (close(fd))
671                 error(1, errno, "close");
672 
673         fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20);
674 }
675 
676 static void do_test(int domain, int type, int protocol)
677 {
678         int i;
679 
680         if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM))
681                 error(1, 0, "can only cork udp sockets");
682 
683         do_setcpu(cfg_cpu);
684 
685         for (i = 0; i < IP_MAXPACKET; i++)
686                 payload[i] = 'a' + (i % 26);
687 
688         if (cfg_rx)
689                 do_rx(domain, type, protocol);
690         else
691                 do_tx(domain, type, protocol);
692 }
693 
694 static void usage(const char *filepath)
695 {
696         error(1, 0, "Usage: %s [options] <test>", filepath);
697 }
698 
699 static void parse_opts(int argc, char **argv)
700 {
701         const int max_payload_len = sizeof(payload) -
702                                     sizeof(struct ipv6hdr) -
703                                     sizeof(struct tcphdr) -
704                                     40 /* max tcp options */;
705         int c;
706         char *daddr = NULL, *saddr = NULL;
707         char *cfg_test;
708 
709         cfg_payload_len = max_payload_len;
710 
711         while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
712                 switch (c) {
713                 case '4':
714                         if (cfg_family != PF_UNSPEC)
715                                 error(1, 0, "Pass one of -4 or -6");
716                         cfg_family = PF_INET;
717                         cfg_alen = sizeof(struct sockaddr_in);
718                         break;
719                 case '6':
720                         if (cfg_family != PF_UNSPEC)
721                                 error(1, 0, "Pass one of -4 or -6");
722                         cfg_family = PF_INET6;
723                         cfg_alen = sizeof(struct sockaddr_in6);
724                         break;
725                 case 'c':
726                         cfg_cork = strtol(optarg, NULL, 0);
727                         break;
728                 case 'C':
729                         cfg_cpu = strtol(optarg, NULL, 0);
730                         break;
731                 case 'D':
732                         daddr = optarg;
733                         break;
734                 case 'i':
735                         cfg_ifindex = if_nametoindex(optarg);
736                         if (cfg_ifindex == 0)
737                                 error(1, errno, "invalid iface: %s", optarg);
738                         break;
739                 case 'm':
740                         cfg_cork_mixed = true;
741                         break;
742                 case 'p':
743                         cfg_port = strtoul(optarg, NULL, 0);
744                         break;
745                 case 'r':
746                         cfg_rx = true;
747                         break;
748                 case 's':
749                         cfg_payload_len = strtoul(optarg, NULL, 0);
750                         break;
751                 case 'S':
752                         saddr = optarg;
753                         break;
754                 case 't':
755                         cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
756                         break;
757                 case 'v':
758                         cfg_verbose++;
759                         break;
760                 case 'z':
761                         cfg_zerocopy = true;
762                         break;
763                 }
764         }
765 
766         cfg_test = argv[argc - 1];
767         if (strcmp(cfg_test, "rds") == 0) {
768                 if (!daddr)
769                         error(1, 0, "-D <server addr> required for PF_RDS\n");
770                 if (!cfg_rx && !saddr)
771                         error(1, 0, "-S <client addr> required for PF_RDS\n");
772         }
773         setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
774         setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
775 
776         if (cfg_payload_len > max_payload_len)
777                 error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
778         if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork))
779                 error(1, 0, "-m: cork_mixed requires corking and zerocopy");
780 
781         if (optind != argc - 1)
782                 usage(argv[0]);
783 }
784 
785 int main(int argc, char **argv)
786 {
787         const char *cfg_test;
788 
789         parse_opts(argc, argv);
790 
791         cfg_test = argv[argc - 1];
792 
793         if (!strcmp(cfg_test, "packet"))
794                 do_test(PF_PACKET, SOCK_RAW, 0);
795         else if (!strcmp(cfg_test, "packet_dgram"))
796                 do_test(PF_PACKET, SOCK_DGRAM, 0);
797         else if (!strcmp(cfg_test, "raw"))
798                 do_test(cfg_family, SOCK_RAW, IPPROTO_EGP);
799         else if (!strcmp(cfg_test, "raw_hdrincl"))
800                 do_test(cfg_family, SOCK_RAW, IPPROTO_RAW);
801         else if (!strcmp(cfg_test, "tcp"))
802                 do_test(cfg_family, SOCK_STREAM, 0);
803         else if (!strcmp(cfg_test, "udp"))
804                 do_test(cfg_family, SOCK_DGRAM, 0);
805         else if (!strcmp(cfg_test, "rds"))
806                 do_test(PF_RDS, SOCK_SEQPACKET, 0);
807         else
808                 error(1, 0, "unknown cfg_test %s", cfg_test);
809 
810         return 0;
811 }
812 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp