~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/samples/bpf/xdpsock_user.c

Version: ~ [ linux-5.18-rc6 ] ~ [ linux-5.17.6 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.38 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.114 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.192 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.241 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.277 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.312 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.302 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /* Copyright(c) 2017 - 2018 Intel Corporation. */
  3 
  4 #include <assert.h>
  5 #include <errno.h>
  6 #include <getopt.h>
  7 #include <libgen.h>
  8 #include <linux/bpf.h>
  9 #include <linux/if_link.h>
 10 #include <linux/if_xdp.h>
 11 #include <linux/if_ether.h>
 12 #include <net/if.h>
 13 #include <signal.h>
 14 #include <stdbool.h>
 15 #include <stdio.h>
 16 #include <stdlib.h>
 17 #include <string.h>
 18 #include <net/ethernet.h>
 19 #include <sys/resource.h>
 20 #include <sys/socket.h>
 21 #include <sys/mman.h>
 22 #include <time.h>
 23 #include <unistd.h>
 24 #include <pthread.h>
 25 #include <locale.h>
 26 #include <sys/types.h>
 27 #include <poll.h>
 28 
 29 #include "bpf/libbpf.h"
 30 #include "bpf_util.h"
 31 #include <bpf/bpf.h>
 32 
 33 #include "xdpsock.h"
 34 
 35 #ifndef SOL_XDP
 36 #define SOL_XDP 283
 37 #endif
 38 
 39 #ifndef AF_XDP
 40 #define AF_XDP 44
 41 #endif
 42 
 43 #ifndef PF_XDP
 44 #define PF_XDP AF_XDP
 45 #endif
 46 
 47 #define NUM_FRAMES 131072
 48 #define FRAME_HEADROOM 0
 49 #define FRAME_SHIFT 11
 50 #define FRAME_SIZE 2048
 51 #define NUM_DESCS 1024
 52 #define BATCH_SIZE 16
 53 
 54 #define FQ_NUM_DESCS 1024
 55 #define CQ_NUM_DESCS 1024
 56 
 57 #define DEBUG_HEXDUMP 0
 58 
 59 typedef __u64 u64;
 60 typedef __u32 u32;
 61 
 62 static unsigned long prev_time;
 63 
 64 enum benchmark_type {
 65         BENCH_RXDROP = 0,
 66         BENCH_TXONLY = 1,
 67         BENCH_L2FWD = 2,
 68 };
 69 
 70 static enum benchmark_type opt_bench = BENCH_RXDROP;
 71 static u32 opt_xdp_flags;
 72 static const char *opt_if = "";
 73 static int opt_ifindex;
 74 static int opt_queue;
 75 static int opt_poll;
 76 static int opt_shared_packet_buffer;
 77 static int opt_interval = 1;
 78 static u32 opt_xdp_bind_flags;
 79 
 80 struct xdp_umem_uqueue {
 81         u32 cached_prod;
 82         u32 cached_cons;
 83         u32 mask;
 84         u32 size;
 85         u32 *producer;
 86         u32 *consumer;
 87         u64 *ring;
 88         void *map;
 89 };
 90 
 91 struct xdp_umem {
 92         char *frames;
 93         struct xdp_umem_uqueue fq;
 94         struct xdp_umem_uqueue cq;
 95         int fd;
 96 };
 97 
 98 struct xdp_uqueue {
 99         u32 cached_prod;
100         u32 cached_cons;
101         u32 mask;
102         u32 size;
103         u32 *producer;
104         u32 *consumer;
105         struct xdp_desc *ring;
106         void *map;
107 };
108 
109 struct xdpsock {
110         struct xdp_uqueue rx;
111         struct xdp_uqueue tx;
112         int sfd;
113         struct xdp_umem *umem;
114         u32 outstanding_tx;
115         unsigned long rx_npkts;
116         unsigned long tx_npkts;
117         unsigned long prev_rx_npkts;
118         unsigned long prev_tx_npkts;
119 };
120 
121 static int num_socks;
122 struct xdpsock *xsks[MAX_SOCKS];
123 
124 static unsigned long get_nsecs(void)
125 {
126         struct timespec ts;
127 
128         clock_gettime(CLOCK_MONOTONIC, &ts);
129         return ts.tv_sec * 1000000000UL + ts.tv_nsec;
130 }
131 
132 static void dump_stats(void);
133 
134 #define lassert(expr)                                                   \
135         do {                                                            \
136                 if (!(expr)) {                                          \
137                         fprintf(stderr, "%s:%s:%i: Assertion failed: "  \
138                                 #expr ": errno: %d/\"%s\"\n",           \
139                                 __FILE__, __func__, __LINE__,           \
140                                 errno, strerror(errno));                \
141                         dump_stats();                                   \
142                         exit(EXIT_FAILURE);                             \
143                 }                                                       \
144         } while (0)
145 
146 #define barrier() __asm__ __volatile__("": : :"memory")
147 #ifdef __aarch64__
148 #define u_smp_rmb() __asm__ __volatile__("dmb ishld": : :"memory")
149 #define u_smp_wmb() __asm__ __volatile__("dmb ishst": : :"memory")
150 #else
151 #define u_smp_rmb() barrier()
152 #define u_smp_wmb() barrier()
153 #endif
154 #define likely(x) __builtin_expect(!!(x), 1)
155 #define unlikely(x) __builtin_expect(!!(x), 0)
156 
157 static const char pkt_data[] =
158         "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
159         "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
160         "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
161         "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
162 
163 static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
164 {
165         u32 free_entries = q->cached_cons - q->cached_prod;
166 
167         if (free_entries >= nb)
168                 return free_entries;
169 
170         /* Refresh the local tail pointer */
171         q->cached_cons = *q->consumer + q->size;
172 
173         return q->cached_cons - q->cached_prod;
174 }
175 
176 static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
177 {
178         u32 free_entries = q->cached_cons - q->cached_prod;
179 
180         if (free_entries >= ndescs)
181                 return free_entries;
182 
183         /* Refresh the local tail pointer */
184         q->cached_cons = *q->consumer + q->size;
185         return q->cached_cons - q->cached_prod;
186 }
187 
188 static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb)
189 {
190         u32 entries = q->cached_prod - q->cached_cons;
191 
192         if (entries == 0) {
193                 q->cached_prod = *q->producer;
194                 entries = q->cached_prod - q->cached_cons;
195         }
196 
197         return (entries > nb) ? nb : entries;
198 }
199 
200 static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs)
201 {
202         u32 entries = q->cached_prod - q->cached_cons;
203 
204         if (entries == 0) {
205                 q->cached_prod = *q->producer;
206                 entries = q->cached_prod - q->cached_cons;
207         }
208 
209         return (entries > ndescs) ? ndescs : entries;
210 }
211 
212 static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
213                                          struct xdp_desc *d,
214                                          size_t nb)
215 {
216         u32 i;
217 
218         if (umem_nb_free(fq, nb) < nb)
219                 return -ENOSPC;
220 
221         for (i = 0; i < nb; i++) {
222                 u32 idx = fq->cached_prod++ & fq->mask;
223 
224                 fq->ring[idx] = d[i].addr;
225         }
226 
227         u_smp_wmb();
228 
229         *fq->producer = fq->cached_prod;
230 
231         return 0;
232 }
233 
234 static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u64 *d,
235                                       size_t nb)
236 {
237         u32 i;
238 
239         if (umem_nb_free(fq, nb) < nb)
240                 return -ENOSPC;
241 
242         for (i = 0; i < nb; i++) {
243                 u32 idx = fq->cached_prod++ & fq->mask;
244 
245                 fq->ring[idx] = d[i];
246         }
247 
248         u_smp_wmb();
249 
250         *fq->producer = fq->cached_prod;
251 
252         return 0;
253 }
254 
255 static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
256                                                u64 *d, size_t nb)
257 {
258         u32 idx, i, entries = umem_nb_avail(cq, nb);
259 
260         u_smp_rmb();
261 
262         for (i = 0; i < entries; i++) {
263                 idx = cq->cached_cons++ & cq->mask;
264                 d[i] = cq->ring[idx];
265         }
266 
267         if (entries > 0) {
268                 u_smp_wmb();
269 
270                 *cq->consumer = cq->cached_cons;
271         }
272 
273         return entries;
274 }
275 
276 static inline void *xq_get_data(struct xdpsock *xsk, u64 addr)
277 {
278         return &xsk->umem->frames[addr];
279 }
280 
281 static inline int xq_enq(struct xdp_uqueue *uq,
282                          const struct xdp_desc *descs,
283                          unsigned int ndescs)
284 {
285         struct xdp_desc *r = uq->ring;
286         unsigned int i;
287 
288         if (xq_nb_free(uq, ndescs) < ndescs)
289                 return -ENOSPC;
290 
291         for (i = 0; i < ndescs; i++) {
292                 u32 idx = uq->cached_prod++ & uq->mask;
293 
294                 r[idx].addr = descs[i].addr;
295                 r[idx].len = descs[i].len;
296         }
297 
298         u_smp_wmb();
299 
300         *uq->producer = uq->cached_prod;
301         return 0;
302 }
303 
304 static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
305                                  unsigned int id, unsigned int ndescs)
306 {
307         struct xdp_desc *r = uq->ring;
308         unsigned int i;
309 
310         if (xq_nb_free(uq, ndescs) < ndescs)
311                 return -ENOSPC;
312 
313         for (i = 0; i < ndescs; i++) {
314                 u32 idx = uq->cached_prod++ & uq->mask;
315 
316                 r[idx].addr     = (id + i) << FRAME_SHIFT;
317                 r[idx].len      = sizeof(pkt_data) - 1;
318         }
319 
320         u_smp_wmb();
321 
322         *uq->producer = uq->cached_prod;
323         return 0;
324 }
325 
326 static inline int xq_deq(struct xdp_uqueue *uq,
327                          struct xdp_desc *descs,
328                          int ndescs)
329 {
330         struct xdp_desc *r = uq->ring;
331         unsigned int idx;
332         int i, entries;
333 
334         entries = xq_nb_avail(uq, ndescs);
335 
336         u_smp_rmb();
337 
338         for (i = 0; i < entries; i++) {
339                 idx = uq->cached_cons++ & uq->mask;
340                 descs[i] = r[idx];
341         }
342 
343         if (entries > 0) {
344                 u_smp_wmb();
345 
346                 *uq->consumer = uq->cached_cons;
347         }
348 
349         return entries;
350 }
351 
352 static void swap_mac_addresses(void *data)
353 {
354         struct ether_header *eth = (struct ether_header *)data;
355         struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost;
356         struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost;
357         struct ether_addr tmp;
358 
359         tmp = *src_addr;
360         *src_addr = *dst_addr;
361         *dst_addr = tmp;
362 }
363 
364 static void hex_dump(void *pkt, size_t length, u64 addr)
365 {
366         const unsigned char *address = (unsigned char *)pkt;
367         const unsigned char *line = address;
368         size_t line_size = 32;
369         unsigned char c;
370         char buf[32];
371         int i = 0;
372 
373         if (!DEBUG_HEXDUMP)
374                 return;
375 
376         sprintf(buf, "addr=%llu", addr);
377         printf("length = %zu\n", length);
378         printf("%s | ", buf);
379         while (length-- > 0) {
380                 printf("%02X ", *address++);
381                 if (!(++i % line_size) || (length == 0 && i % line_size)) {
382                         if (length == 0) {
383                                 while (i++ % line_size)
384                                         printf("__ ");
385                         }
386                         printf(" | ");  /* right close */
387                         while (line < address) {
388                                 c = *line++;
389                                 printf("%c", (c < 33 || c == 255) ? 0x2E : c);
390                         }
391                         printf("\n");
392                         if (length > 0)
393                                 printf("%s | ", buf);
394                 }
395         }
396         printf("\n");
397 }
398 
399 static size_t gen_eth_frame(char *frame)
400 {
401         memcpy(frame, pkt_data, sizeof(pkt_data) - 1);
402         return sizeof(pkt_data) - 1;
403 }
404 
405 static struct xdp_umem *xdp_umem_configure(int sfd)
406 {
407         int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS;
408         struct xdp_mmap_offsets off;
409         struct xdp_umem_reg mr;
410         struct xdp_umem *umem;
411         socklen_t optlen;
412         void *bufs;
413 
414         umem = calloc(1, sizeof(*umem));
415         lassert(umem);
416 
417         lassert(posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
418                                NUM_FRAMES * FRAME_SIZE) == 0);
419 
420         mr.addr = (__u64)bufs;
421         mr.len = NUM_FRAMES * FRAME_SIZE;
422         mr.chunk_size = FRAME_SIZE;
423         mr.headroom = FRAME_HEADROOM;
424 
425         lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)) == 0);
426         lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size,
427                            sizeof(int)) == 0);
428         lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
429                            sizeof(int)) == 0);
430 
431         optlen = sizeof(off);
432         lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
433                            &optlen) == 0);
434 
435         umem->fq.map = mmap(0, off.fr.desc +
436                             FQ_NUM_DESCS * sizeof(u64),
437                             PROT_READ | PROT_WRITE,
438                             MAP_SHARED | MAP_POPULATE, sfd,
439                             XDP_UMEM_PGOFF_FILL_RING);
440         lassert(umem->fq.map != MAP_FAILED);
441 
442         umem->fq.mask = FQ_NUM_DESCS - 1;
443         umem->fq.size = FQ_NUM_DESCS;
444         umem->fq.producer = umem->fq.map + off.fr.producer;
445         umem->fq.consumer = umem->fq.map + off.fr.consumer;
446         umem->fq.ring = umem->fq.map + off.fr.desc;
447         umem->fq.cached_cons = FQ_NUM_DESCS;
448 
449         umem->cq.map = mmap(0, off.cr.desc +
450                              CQ_NUM_DESCS * sizeof(u64),
451                              PROT_READ | PROT_WRITE,
452                              MAP_SHARED | MAP_POPULATE, sfd,
453                              XDP_UMEM_PGOFF_COMPLETION_RING);
454         lassert(umem->cq.map != MAP_FAILED);
455 
456         umem->cq.mask = CQ_NUM_DESCS - 1;
457         umem->cq.size = CQ_NUM_DESCS;
458         umem->cq.producer = umem->cq.map + off.cr.producer;
459         umem->cq.consumer = umem->cq.map + off.cr.consumer;
460         umem->cq.ring = umem->cq.map + off.cr.desc;
461 
462         umem->frames = bufs;
463         umem->fd = sfd;
464 
465         if (opt_bench == BENCH_TXONLY) {
466                 int i;
467 
468                 for (i = 0; i < NUM_FRAMES * FRAME_SIZE; i += FRAME_SIZE)
469                         (void)gen_eth_frame(&umem->frames[i]);
470         }
471 
472         return umem;
473 }
474 
475 static struct xdpsock *xsk_configure(struct xdp_umem *umem)
476 {
477         struct sockaddr_xdp sxdp = {};
478         struct xdp_mmap_offsets off;
479         int sfd, ndescs = NUM_DESCS;
480         struct xdpsock *xsk;
481         bool shared = true;
482         socklen_t optlen;
483         u64 i;
484 
485         sfd = socket(PF_XDP, SOCK_RAW, 0);
486         lassert(sfd >= 0);
487 
488         xsk = calloc(1, sizeof(*xsk));
489         lassert(xsk);
490 
491         xsk->sfd = sfd;
492         xsk->outstanding_tx = 0;
493 
494         if (!umem) {
495                 shared = false;
496                 xsk->umem = xdp_umem_configure(sfd);
497         } else {
498                 xsk->umem = umem;
499         }
500 
501         lassert(setsockopt(sfd, SOL_XDP, XDP_RX_RING,
502                            &ndescs, sizeof(int)) == 0);
503         lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING,
504                            &ndescs, sizeof(int)) == 0);
505         optlen = sizeof(off);
506         lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
507                            &optlen) == 0);
508 
509         /* Rx */
510         xsk->rx.map = mmap(NULL,
511                            off.rx.desc +
512                            NUM_DESCS * sizeof(struct xdp_desc),
513                            PROT_READ | PROT_WRITE,
514                            MAP_SHARED | MAP_POPULATE, sfd,
515                            XDP_PGOFF_RX_RING);
516         lassert(xsk->rx.map != MAP_FAILED);
517 
518         if (!shared) {
519                 for (i = 0; i < NUM_DESCS * FRAME_SIZE; i += FRAME_SIZE)
520                         lassert(umem_fill_to_kernel(&xsk->umem->fq, &i, 1)
521                                 == 0);
522         }
523 
524         /* Tx */
525         xsk->tx.map = mmap(NULL,
526                            off.tx.desc +
527                            NUM_DESCS * sizeof(struct xdp_desc),
528                            PROT_READ | PROT_WRITE,
529                            MAP_SHARED | MAP_POPULATE, sfd,
530                            XDP_PGOFF_TX_RING);
531         lassert(xsk->tx.map != MAP_FAILED);
532 
533         xsk->rx.mask = NUM_DESCS - 1;
534         xsk->rx.size = NUM_DESCS;
535         xsk->rx.producer = xsk->rx.map + off.rx.producer;
536         xsk->rx.consumer = xsk->rx.map + off.rx.consumer;
537         xsk->rx.ring = xsk->rx.map + off.rx.desc;
538 
539         xsk->tx.mask = NUM_DESCS - 1;
540         xsk->tx.size = NUM_DESCS;
541         xsk->tx.producer = xsk->tx.map + off.tx.producer;
542         xsk->tx.consumer = xsk->tx.map + off.tx.consumer;
543         xsk->tx.ring = xsk->tx.map + off.tx.desc;
544         xsk->tx.cached_cons = NUM_DESCS;
545 
546         sxdp.sxdp_family = PF_XDP;
547         sxdp.sxdp_ifindex = opt_ifindex;
548         sxdp.sxdp_queue_id = opt_queue;
549 
550         if (shared) {
551                 sxdp.sxdp_flags = XDP_SHARED_UMEM;
552                 sxdp.sxdp_shared_umem_fd = umem->fd;
553         } else {
554                 sxdp.sxdp_flags = opt_xdp_bind_flags;
555         }
556 
557         lassert(bind(sfd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0);
558 
559         return xsk;
560 }
561 
562 static void print_benchmark(bool running)
563 {
564         const char *bench_str = "INVALID";
565 
566         if (opt_bench == BENCH_RXDROP)
567                 bench_str = "rxdrop";
568         else if (opt_bench == BENCH_TXONLY)
569                 bench_str = "txonly";
570         else if (opt_bench == BENCH_L2FWD)
571                 bench_str = "l2fwd";
572 
573         printf("%s:%d %s ", opt_if, opt_queue, bench_str);
574         if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
575                 printf("xdp-skb ");
576         else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
577                 printf("xdp-drv ");
578         else
579                 printf("        ");
580 
581         if (opt_poll)
582                 printf("poll() ");
583 
584         if (running) {
585                 printf("running...");
586                 fflush(stdout);
587         }
588 }
589 
590 static void dump_stats(void)
591 {
592         unsigned long now = get_nsecs();
593         long dt = now - prev_time;
594         int i;
595 
596         prev_time = now;
597 
598         for (i = 0; i < num_socks && xsks[i]; i++) {
599                 char *fmt = "%-15s %'-11.0f %'-11lu\n";
600                 double rx_pps, tx_pps;
601 
602                 rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
603                          1000000000. / dt;
604                 tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
605                          1000000000. / dt;
606 
607                 printf("\n sock%d@", i);
608                 print_benchmark(false);
609                 printf("\n");
610 
611                 printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
612                        dt / 1000000000.);
613                 printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
614                 printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
615 
616                 xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
617                 xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
618         }
619 }
620 
621 static void *poller(void *arg)
622 {
623         (void)arg;
624         for (;;) {
625                 sleep(opt_interval);
626                 dump_stats();
627         }
628 
629         return NULL;
630 }
631 
632 static void int_exit(int sig)
633 {
634         (void)sig;
635         dump_stats();
636         bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
637         exit(EXIT_SUCCESS);
638 }
639 
640 static struct option long_options[] = {
641         {"rxdrop", no_argument, 0, 'r'},
642         {"txonly", no_argument, 0, 't'},
643         {"l2fwd", no_argument, 0, 'l'},
644         {"interface", required_argument, 0, 'i'},
645         {"queue", required_argument, 0, 'q'},
646         {"poll", no_argument, 0, 'p'},
647         {"shared-buffer", no_argument, 0, 's'},
648         {"xdp-skb", no_argument, 0, 'S'},
649         {"xdp-native", no_argument, 0, 'N'},
650         {"interval", required_argument, 0, 'n'},
651         {"zero-copy", no_argument, 0, 'z'},
652         {"copy", no_argument, 0, 'c'},
653         {0, 0, 0, 0}
654 };
655 
656 static void usage(const char *prog)
657 {
658         const char *str =
659                 "  Usage: %s [OPTIONS]\n"
660                 "  Options:\n"
661                 "  -r, --rxdrop         Discard all incoming packets (default)\n"
662                 "  -t, --txonly         Only send packets\n"
663                 "  -l, --l2fwd          MAC swap L2 forwarding\n"
664                 "  -i, --interface=n    Run on interface n\n"
665                 "  -q, --queue=n        Use queue n (default 0)\n"
666                 "  -p, --poll           Use poll syscall\n"
667                 "  -s, --shared-buffer  Use shared packet buffer\n"
668                 "  -S, --xdp-skb=n      Use XDP skb-mod\n"
669                 "  -N, --xdp-native=n   Enfore XDP native mode\n"
670                 "  -n, --interval=n     Specify statistics update interval (default 1 sec).\n"
671                 "  -z, --zero-copy      Force zero-copy mode.\n"
672                 "  -c, --copy           Force copy mode.\n"
673                 "\n";
674         fprintf(stderr, str, prog);
675         exit(EXIT_FAILURE);
676 }
677 
678 static void parse_command_line(int argc, char **argv)
679 {
680         int option_index, c;
681 
682         opterr = 0;
683 
684         for (;;) {
685                 c = getopt_long(argc, argv, "rtli:q:psSNn:cz", long_options,
686                                 &option_index);
687                 if (c == -1)
688                         break;
689 
690                 switch (c) {
691                 case 'r':
692                         opt_bench = BENCH_RXDROP;
693                         break;
694                 case 't':
695                         opt_bench = BENCH_TXONLY;
696                         break;
697                 case 'l':
698                         opt_bench = BENCH_L2FWD;
699                         break;
700                 case 'i':
701                         opt_if = optarg;
702                         break;
703                 case 'q':
704                         opt_queue = atoi(optarg);
705                         break;
706                 case 's':
707                         opt_shared_packet_buffer = 1;
708                         break;
709                 case 'p':
710                         opt_poll = 1;
711                         break;
712                 case 'S':
713                         opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
714                         opt_xdp_bind_flags |= XDP_COPY;
715                         break;
716                 case 'N':
717                         opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
718                         break;
719                 case 'n':
720                         opt_interval = atoi(optarg);
721                         break;
722                 case 'z':
723                         opt_xdp_bind_flags |= XDP_ZEROCOPY;
724                         break;
725                 case 'c':
726                         opt_xdp_bind_flags |= XDP_COPY;
727                         break;
728                 default:
729                         usage(basename(argv[0]));
730                 }
731         }
732 
733         opt_ifindex = if_nametoindex(opt_if);
734         if (!opt_ifindex) {
735                 fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
736                         opt_if);
737                 usage(basename(argv[0]));
738         }
739 }
740 
741 static void kick_tx(int fd)
742 {
743         int ret;
744 
745         ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0);
746         if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY)
747                 return;
748         lassert(0);
749 }
750 
751 static inline void complete_tx_l2fwd(struct xdpsock *xsk)
752 {
753         u64 descs[BATCH_SIZE];
754         unsigned int rcvd;
755         size_t ndescs;
756 
757         if (!xsk->outstanding_tx)
758                 return;
759 
760         kick_tx(xsk->sfd);
761         ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
762                  xsk->outstanding_tx;
763 
764         /* re-add completed Tx buffers */
765         rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, ndescs);
766         if (rcvd > 0) {
767                 umem_fill_to_kernel(&xsk->umem->fq, descs, rcvd);
768                 xsk->outstanding_tx -= rcvd;
769                 xsk->tx_npkts += rcvd;
770         }
771 }
772 
773 static inline void complete_tx_only(struct xdpsock *xsk)
774 {
775         u64 descs[BATCH_SIZE];
776         unsigned int rcvd;
777 
778         if (!xsk->outstanding_tx)
779                 return;
780 
781         kick_tx(xsk->sfd);
782 
783         rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, BATCH_SIZE);
784         if (rcvd > 0) {
785                 xsk->outstanding_tx -= rcvd;
786                 xsk->tx_npkts += rcvd;
787         }
788 }
789 
790 static void rx_drop(struct xdpsock *xsk)
791 {
792         struct xdp_desc descs[BATCH_SIZE];
793         unsigned int rcvd, i;
794 
795         rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
796         if (!rcvd)
797                 return;
798 
799         for (i = 0; i < rcvd; i++) {
800                 char *pkt = xq_get_data(xsk, descs[i].addr);
801 
802                 hex_dump(pkt, descs[i].len, descs[i].addr);
803         }
804 
805         xsk->rx_npkts += rcvd;
806 
807         umem_fill_to_kernel_ex(&xsk->umem->fq, descs, rcvd);
808 }
809 
810 static void rx_drop_all(void)
811 {
812         struct pollfd fds[MAX_SOCKS + 1];
813         int i, ret, timeout, nfds = 1;
814 
815         memset(fds, 0, sizeof(fds));
816 
817         for (i = 0; i < num_socks; i++) {
818                 fds[i].fd = xsks[i]->sfd;
819                 fds[i].events = POLLIN;
820                 timeout = 1000; /* 1sn */
821         }
822 
823         for (;;) {
824                 if (opt_poll) {
825                         ret = poll(fds, nfds, timeout);
826                         if (ret <= 0)
827                                 continue;
828                 }
829 
830                 for (i = 0; i < num_socks; i++)
831                         rx_drop(xsks[i]);
832         }
833 }
834 
835 static void tx_only(struct xdpsock *xsk)
836 {
837         int timeout, ret, nfds = 1;
838         struct pollfd fds[nfds + 1];
839         unsigned int idx = 0;
840 
841         memset(fds, 0, sizeof(fds));
842         fds[0].fd = xsk->sfd;
843         fds[0].events = POLLOUT;
844         timeout = 1000; /* 1sn */
845 
846         for (;;) {
847                 if (opt_poll) {
848                         ret = poll(fds, nfds, timeout);
849                         if (ret <= 0)
850                                 continue;
851 
852                         if (fds[0].fd != xsk->sfd ||
853                             !(fds[0].revents & POLLOUT))
854                                 continue;
855                 }
856 
857                 if (xq_nb_free(&xsk->tx, BATCH_SIZE) >= BATCH_SIZE) {
858                         lassert(xq_enq_tx_only(&xsk->tx, idx, BATCH_SIZE) == 0);
859 
860                         xsk->outstanding_tx += BATCH_SIZE;
861                         idx += BATCH_SIZE;
862                         idx %= NUM_FRAMES;
863                 }
864 
865                 complete_tx_only(xsk);
866         }
867 }
868 
869 static void l2fwd(struct xdpsock *xsk)
870 {
871         for (;;) {
872                 struct xdp_desc descs[BATCH_SIZE];
873                 unsigned int rcvd, i;
874                 int ret;
875 
876                 for (;;) {
877                         complete_tx_l2fwd(xsk);
878 
879                         rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
880                         if (rcvd > 0)
881                                 break;
882                 }
883 
884                 for (i = 0; i < rcvd; i++) {
885                         char *pkt = xq_get_data(xsk, descs[i].addr);
886 
887                         swap_mac_addresses(pkt);
888 
889                         hex_dump(pkt, descs[i].len, descs[i].addr);
890                 }
891 
892                 xsk->rx_npkts += rcvd;
893 
894                 ret = xq_enq(&xsk->tx, descs, rcvd);
895                 lassert(ret == 0);
896                 xsk->outstanding_tx += rcvd;
897         }
898 }
899 
900 int main(int argc, char **argv)
901 {
902         struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
903         struct bpf_prog_load_attr prog_load_attr = {
904                 .prog_type      = BPF_PROG_TYPE_XDP,
905         };
906         int prog_fd, qidconf_map, xsks_map;
907         struct bpf_object *obj;
908         char xdp_filename[256];
909         struct bpf_map *map;
910         int i, ret, key = 0;
911         pthread_t pt;
912 
913         parse_command_line(argc, argv);
914 
915         if (setrlimit(RLIMIT_MEMLOCK, &r)) {
916                 fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n",
917                         strerror(errno));
918                 exit(EXIT_FAILURE);
919         }
920 
921         snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
922         prog_load_attr.file = xdp_filename;
923 
924         if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
925                 exit(EXIT_FAILURE);
926         if (prog_fd < 0) {
927                 fprintf(stderr, "ERROR: no program found: %s\n",
928                         strerror(prog_fd));
929                 exit(EXIT_FAILURE);
930         }
931 
932         map = bpf_object__find_map_by_name(obj, "qidconf_map");
933         qidconf_map = bpf_map__fd(map);
934         if (qidconf_map < 0) {
935                 fprintf(stderr, "ERROR: no qidconf map found: %s\n",
936                         strerror(qidconf_map));
937                 exit(EXIT_FAILURE);
938         }
939 
940         map = bpf_object__find_map_by_name(obj, "xsks_map");
941         xsks_map = bpf_map__fd(map);
942         if (xsks_map < 0) {
943                 fprintf(stderr, "ERROR: no xsks map found: %s\n",
944                         strerror(xsks_map));
945                 exit(EXIT_FAILURE);
946         }
947 
948         if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
949                 fprintf(stderr, "ERROR: link set xdp fd failed\n");
950                 exit(EXIT_FAILURE);
951         }
952 
953         ret = bpf_map_update_elem(qidconf_map, &key, &opt_queue, 0);
954         if (ret) {
955                 fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n");
956                 exit(EXIT_FAILURE);
957         }
958 
959         /* Create sockets... */
960         xsks[num_socks++] = xsk_configure(NULL);
961 
962 #if RR_LB
963         for (i = 0; i < MAX_SOCKS - 1; i++)
964                 xsks[num_socks++] = xsk_configure(xsks[0]->umem);
965 #endif
966 
967         /* ...and insert them into the map. */
968         for (i = 0; i < num_socks; i++) {
969                 key = i;
970                 ret = bpf_map_update_elem(xsks_map, &key, &xsks[i]->sfd, 0);
971                 if (ret) {
972                         fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
973                         exit(EXIT_FAILURE);
974                 }
975         }
976 
977         signal(SIGINT, int_exit);
978         signal(SIGTERM, int_exit);
979         signal(SIGABRT, int_exit);
980 
981         setlocale(LC_ALL, "");
982 
983         ret = pthread_create(&pt, NULL, poller, NULL);
984         lassert(ret == 0);
985 
986         prev_time = get_nsecs();
987 
988         if (opt_bench == BENCH_RXDROP)
989                 rx_drop_all();
990         else if (opt_bench == BENCH_TXONLY)
991                 tx_only(xsks[0]);
992         else
993                 l2fwd(xsks[0]);
994 
995         return 0;
996 }
997 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp