~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/testing/selftests/seccomp/seccomp_bpf.c

Version: ~ [ linux-6.1-rc7 ] ~ [ linux-6.0.10 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.80 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.156 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.225 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.267 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.300 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.334 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.302 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
  4  *
  5  * Test code for seccomp bpf.
  6  */
  7 
  8 #define _GNU_SOURCE
  9 #include <sys/types.h>
 10 
 11 /*
 12  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
 13  * we need to use the kernel's siginfo.h file and trick glibc
 14  * into accepting it.
 15  */
 16 #if !__GLIBC_PREREQ(2, 26)
 17 # include <asm/siginfo.h>
 18 # define __have_siginfo_t 1
 19 # define __have_sigval_t 1
 20 # define __have_sigevent_t 1
 21 #endif
 22 
 23 #include <errno.h>
 24 #include <linux/filter.h>
 25 #include <sys/prctl.h>
 26 #include <sys/ptrace.h>
 27 #include <sys/user.h>
 28 #include <linux/prctl.h>
 29 #include <linux/ptrace.h>
 30 #include <linux/seccomp.h>
 31 #include <pthread.h>
 32 #include <semaphore.h>
 33 #include <signal.h>
 34 #include <stddef.h>
 35 #include <stdbool.h>
 36 #include <string.h>
 37 #include <time.h>
 38 #include <limits.h>
 39 #include <linux/elf.h>
 40 #include <sys/uio.h>
 41 #include <sys/utsname.h>
 42 #include <sys/fcntl.h>
 43 #include <sys/mman.h>
 44 #include <sys/times.h>
 45 #include <sys/socket.h>
 46 #include <sys/ioctl.h>
 47 #include <linux/kcmp.h>
 48 
 49 #include <unistd.h>
 50 #include <sys/syscall.h>
 51 #include <poll.h>
 52 
 53 #include "../kselftest_harness.h"
 54 
 55 #ifndef PR_SET_PTRACER
 56 # define PR_SET_PTRACER 0x59616d61
 57 #endif
 58 
 59 #ifndef PR_SET_NO_NEW_PRIVS
 60 #define PR_SET_NO_NEW_PRIVS 38
 61 #define PR_GET_NO_NEW_PRIVS 39
 62 #endif
 63 
 64 #ifndef PR_SECCOMP_EXT
 65 #define PR_SECCOMP_EXT 43
 66 #endif
 67 
 68 #ifndef SECCOMP_EXT_ACT
 69 #define SECCOMP_EXT_ACT 1
 70 #endif
 71 
 72 #ifndef SECCOMP_EXT_ACT_TSYNC
 73 #define SECCOMP_EXT_ACT_TSYNC 1
 74 #endif
 75 
 76 #ifndef SECCOMP_MODE_STRICT
 77 #define SECCOMP_MODE_STRICT 1
 78 #endif
 79 
 80 #ifndef SECCOMP_MODE_FILTER
 81 #define SECCOMP_MODE_FILTER 2
 82 #endif
 83 
 84 #ifndef SECCOMP_RET_ALLOW
 85 struct seccomp_data {
 86         int nr;
 87         __u32 arch;
 88         __u64 instruction_pointer;
 89         __u64 args[6];
 90 };
 91 #endif
 92 
 93 #ifndef SECCOMP_RET_KILL_PROCESS
 94 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
 95 #define SECCOMP_RET_KILL_THREAD  0x00000000U /* kill the thread */
 96 #endif
 97 #ifndef SECCOMP_RET_KILL
 98 #define SECCOMP_RET_KILL         SECCOMP_RET_KILL_THREAD
 99 #define SECCOMP_RET_TRAP         0x00030000U /* disallow and force a SIGSYS */
100 #define SECCOMP_RET_ERRNO        0x00050000U /* returns an errno */
101 #define SECCOMP_RET_TRACE        0x7ff00000U /* pass to a tracer or disallow */
102 #define SECCOMP_RET_ALLOW        0x7fff0000U /* allow */
103 #endif
104 #ifndef SECCOMP_RET_LOG
105 #define SECCOMP_RET_LOG          0x7ffc0000U /* allow after logging */
106 #endif
107 
108 #ifndef __NR_seccomp
109 # if defined(__i386__)
110 #  define __NR_seccomp 354
111 # elif defined(__x86_64__)
112 #  define __NR_seccomp 317
113 # elif defined(__arm__)
114 #  define __NR_seccomp 383
115 # elif defined(__aarch64__)
116 #  define __NR_seccomp 277
117 # elif defined(__riscv)
118 #  define __NR_seccomp 277
119 # elif defined(__hppa__)
120 #  define __NR_seccomp 338
121 # elif defined(__powerpc__)
122 #  define __NR_seccomp 358
123 # elif defined(__s390__)
124 #  define __NR_seccomp 348
125 # else
126 #  warning "seccomp syscall number unknown for this architecture"
127 #  define __NR_seccomp 0xffff
128 # endif
129 #endif
130 
131 #ifndef SECCOMP_SET_MODE_STRICT
132 #define SECCOMP_SET_MODE_STRICT 0
133 #endif
134 
135 #ifndef SECCOMP_SET_MODE_FILTER
136 #define SECCOMP_SET_MODE_FILTER 1
137 #endif
138 
139 #ifndef SECCOMP_GET_ACTION_AVAIL
140 #define SECCOMP_GET_ACTION_AVAIL 2
141 #endif
142 
143 #ifndef SECCOMP_GET_NOTIF_SIZES
144 #define SECCOMP_GET_NOTIF_SIZES 3
145 #endif
146 
147 #ifndef SECCOMP_FILTER_FLAG_TSYNC
148 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
149 #endif
150 
151 #ifndef SECCOMP_FILTER_FLAG_LOG
152 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
153 #endif
154 
155 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
156 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
157 #endif
158 
159 #ifndef PTRACE_SECCOMP_GET_METADATA
160 #define PTRACE_SECCOMP_GET_METADATA     0x420d
161 
162 struct seccomp_metadata {
163         __u64 filter_off;       /* Input: which filter */
164         __u64 flags;             /* Output: filter's flags */
165 };
166 #endif
167 
168 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
169 #define SECCOMP_FILTER_FLAG_NEW_LISTENER        (1UL << 3)
170 
171 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U
172 
173 #define SECCOMP_IOC_MAGIC               '!'
174 #define SECCOMP_IO(nr)                  _IO(SECCOMP_IOC_MAGIC, nr)
175 #define SECCOMP_IOR(nr, type)           _IOR(SECCOMP_IOC_MAGIC, nr, type)
176 #define SECCOMP_IOW(nr, type)           _IOW(SECCOMP_IOC_MAGIC, nr, type)
177 #define SECCOMP_IOWR(nr, type)          _IOWR(SECCOMP_IOC_MAGIC, nr, type)
178 
179 /* Flags for seccomp notification fd ioctl. */
180 #define SECCOMP_IOCTL_NOTIF_RECV        SECCOMP_IOWR(0, struct seccomp_notif)
181 #define SECCOMP_IOCTL_NOTIF_SEND        SECCOMP_IOWR(1, \
182                                                 struct seccomp_notif_resp)
183 #define SECCOMP_IOCTL_NOTIF_ID_VALID    SECCOMP_IOW(2, __u64)
184 
185 struct seccomp_notif {
186         __u64 id;
187         __u32 pid;
188         __u32 flags;
189         struct seccomp_data data;
190 };
191 
192 struct seccomp_notif_resp {
193         __u64 id;
194         __s64 val;
195         __s32 error;
196         __u32 flags;
197 };
198 
199 struct seccomp_notif_sizes {
200         __u16 seccomp_notif;
201         __u16 seccomp_notif_resp;
202         __u16 seccomp_data;
203 };
204 #endif
205 
206 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
207 #define PTRACE_EVENTMSG_SYSCALL_ENTRY   1
208 #define PTRACE_EVENTMSG_SYSCALL_EXIT    2
209 #endif
210 
211 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE
212 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
213 #endif
214 
215 #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH
216 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
217 #endif
218 
219 #ifndef seccomp
220 int seccomp(unsigned int op, unsigned int flags, void *args)
221 {
222         errno = 0;
223         return syscall(__NR_seccomp, op, flags, args);
224 }
225 #endif
226 
227 #if __BYTE_ORDER == __LITTLE_ENDIAN
228 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
229 #elif __BYTE_ORDER == __BIG_ENDIAN
230 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
231 #else
232 #error "wut? Unknown __BYTE_ORDER?!"
233 #endif
234 
235 #define SIBLING_EXIT_UNKILLED   0xbadbeef
236 #define SIBLING_EXIT_FAILURE    0xbadface
237 #define SIBLING_EXIT_NEWPRIVS   0xbadfeed
238 
239 TEST(mode_strict_support)
240 {
241         long ret;
242 
243         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
244         ASSERT_EQ(0, ret) {
245                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
246         }
247         syscall(__NR_exit, 0);
248 }
249 
250 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
251 {
252         long ret;
253 
254         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
255         ASSERT_EQ(0, ret) {
256                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
257         }
258         syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
259                 NULL, NULL, NULL);
260         EXPECT_FALSE(true) {
261                 TH_LOG("Unreachable!");
262         }
263 }
264 
265 /* Note! This doesn't test no new privs behavior */
266 TEST(no_new_privs_support)
267 {
268         long ret;
269 
270         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
271         EXPECT_EQ(0, ret) {
272                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
273         }
274 }
275 
276 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */
277 TEST(mode_filter_support)
278 {
279         long ret;
280 
281         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
282         ASSERT_EQ(0, ret) {
283                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
284         }
285         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
286         EXPECT_EQ(-1, ret);
287         EXPECT_EQ(EFAULT, errno) {
288                 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
289         }
290 }
291 
292 TEST(mode_filter_without_nnp)
293 {
294         struct sock_filter filter[] = {
295                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
296         };
297         struct sock_fprog prog = {
298                 .len = (unsigned short)ARRAY_SIZE(filter),
299                 .filter = filter,
300         };
301         long ret;
302 
303         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
304         ASSERT_LE(0, ret) {
305                 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
306         }
307         errno = 0;
308         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
309         /* Succeeds with CAP_SYS_ADMIN, fails without */
310         /* TODO(wad) check caps not euid */
311         if (geteuid()) {
312                 EXPECT_EQ(-1, ret);
313                 EXPECT_EQ(EACCES, errno);
314         } else {
315                 EXPECT_EQ(0, ret);
316         }
317 }
318 
319 #define MAX_INSNS_PER_PATH 32768
320 
321 TEST(filter_size_limits)
322 {
323         int i;
324         int count = BPF_MAXINSNS + 1;
325         struct sock_filter allow[] = {
326                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
327         };
328         struct sock_filter *filter;
329         struct sock_fprog prog = { };
330         long ret;
331 
332         filter = calloc(count, sizeof(*filter));
333         ASSERT_NE(NULL, filter);
334 
335         for (i = 0; i < count; i++)
336                 filter[i] = allow[0];
337 
338         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
339         ASSERT_EQ(0, ret);
340 
341         prog.filter = filter;
342         prog.len = count;
343 
344         /* Too many filter instructions in a single filter. */
345         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
346         ASSERT_NE(0, ret) {
347                 TH_LOG("Installing %d insn filter was allowed", prog.len);
348         }
349 
350         /* One less is okay, though. */
351         prog.len -= 1;
352         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
353         ASSERT_EQ(0, ret) {
354                 TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
355         }
356 }
357 
358 TEST(filter_chain_limits)
359 {
360         int i;
361         int count = BPF_MAXINSNS;
362         struct sock_filter allow[] = {
363                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
364         };
365         struct sock_filter *filter;
366         struct sock_fprog prog = { };
367         long ret;
368 
369         filter = calloc(count, sizeof(*filter));
370         ASSERT_NE(NULL, filter);
371 
372         for (i = 0; i < count; i++)
373                 filter[i] = allow[0];
374 
375         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
376         ASSERT_EQ(0, ret);
377 
378         prog.filter = filter;
379         prog.len = 1;
380 
381         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
382         ASSERT_EQ(0, ret);
383 
384         prog.len = count;
385 
386         /* Too many total filter instructions. */
387         for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
388                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
389                 if (ret != 0)
390                         break;
391         }
392         ASSERT_NE(0, ret) {
393                 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
394                        i, count, i * (count + 4));
395         }
396 }
397 
398 TEST(mode_filter_cannot_move_to_strict)
399 {
400         struct sock_filter filter[] = {
401                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
402         };
403         struct sock_fprog prog = {
404                 .len = (unsigned short)ARRAY_SIZE(filter),
405                 .filter = filter,
406         };
407         long ret;
408 
409         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
410         ASSERT_EQ(0, ret);
411 
412         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
413         ASSERT_EQ(0, ret);
414 
415         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
416         EXPECT_EQ(-1, ret);
417         EXPECT_EQ(EINVAL, errno);
418 }
419 
420 
421 TEST(mode_filter_get_seccomp)
422 {
423         struct sock_filter filter[] = {
424                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
425         };
426         struct sock_fprog prog = {
427                 .len = (unsigned short)ARRAY_SIZE(filter),
428                 .filter = filter,
429         };
430         long ret;
431 
432         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
433         ASSERT_EQ(0, ret);
434 
435         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
436         EXPECT_EQ(0, ret);
437 
438         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
439         ASSERT_EQ(0, ret);
440 
441         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
442         EXPECT_EQ(2, ret);
443 }
444 
445 
446 TEST(ALLOW_all)
447 {
448         struct sock_filter filter[] = {
449                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
450         };
451         struct sock_fprog prog = {
452                 .len = (unsigned short)ARRAY_SIZE(filter),
453                 .filter = filter,
454         };
455         long ret;
456 
457         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
458         ASSERT_EQ(0, ret);
459 
460         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
461         ASSERT_EQ(0, ret);
462 }
463 
464 TEST(empty_prog)
465 {
466         struct sock_filter filter[] = {
467         };
468         struct sock_fprog prog = {
469                 .len = (unsigned short)ARRAY_SIZE(filter),
470                 .filter = filter,
471         };
472         long ret;
473 
474         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
475         ASSERT_EQ(0, ret);
476 
477         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
478         EXPECT_EQ(-1, ret);
479         EXPECT_EQ(EINVAL, errno);
480 }
481 
482 TEST(log_all)
483 {
484         struct sock_filter filter[] = {
485                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
486         };
487         struct sock_fprog prog = {
488                 .len = (unsigned short)ARRAY_SIZE(filter),
489                 .filter = filter,
490         };
491         long ret;
492         pid_t parent = getppid();
493 
494         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
495         ASSERT_EQ(0, ret);
496 
497         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
498         ASSERT_EQ(0, ret);
499 
500         /* getppid() should succeed and be logged (no check for logging) */
501         EXPECT_EQ(parent, syscall(__NR_getppid));
502 }
503 
504 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
505 {
506         struct sock_filter filter[] = {
507                 BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
508         };
509         struct sock_fprog prog = {
510                 .len = (unsigned short)ARRAY_SIZE(filter),
511                 .filter = filter,
512         };
513         long ret;
514 
515         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
516         ASSERT_EQ(0, ret);
517 
518         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
519         ASSERT_EQ(0, ret);
520         EXPECT_EQ(0, syscall(__NR_getpid)) {
521                 TH_LOG("getpid() shouldn't ever return");
522         }
523 }
524 
525 /* return code >= 0x80000000 is unused. */
526 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
527 {
528         struct sock_filter filter[] = {
529                 BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
530         };
531         struct sock_fprog prog = {
532                 .len = (unsigned short)ARRAY_SIZE(filter),
533                 .filter = filter,
534         };
535         long ret;
536 
537         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
538         ASSERT_EQ(0, ret);
539 
540         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
541         ASSERT_EQ(0, ret);
542         EXPECT_EQ(0, syscall(__NR_getpid)) {
543                 TH_LOG("getpid() shouldn't ever return");
544         }
545 }
546 
547 TEST_SIGNAL(KILL_all, SIGSYS)
548 {
549         struct sock_filter filter[] = {
550                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
551         };
552         struct sock_fprog prog = {
553                 .len = (unsigned short)ARRAY_SIZE(filter),
554                 .filter = filter,
555         };
556         long ret;
557 
558         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
559         ASSERT_EQ(0, ret);
560 
561         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
562         ASSERT_EQ(0, ret);
563 }
564 
565 TEST_SIGNAL(KILL_one, SIGSYS)
566 {
567         struct sock_filter filter[] = {
568                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
569                         offsetof(struct seccomp_data, nr)),
570                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
571                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
572                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
573         };
574         struct sock_fprog prog = {
575                 .len = (unsigned short)ARRAY_SIZE(filter),
576                 .filter = filter,
577         };
578         long ret;
579         pid_t parent = getppid();
580 
581         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
582         ASSERT_EQ(0, ret);
583 
584         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
585         ASSERT_EQ(0, ret);
586 
587         EXPECT_EQ(parent, syscall(__NR_getppid));
588         /* getpid() should never return. */
589         EXPECT_EQ(0, syscall(__NR_getpid));
590 }
591 
592 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
593 {
594         void *fatal_address;
595         struct sock_filter filter[] = {
596                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
597                         offsetof(struct seccomp_data, nr)),
598                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
599                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
600                 /* Only both with lower 32-bit for now. */
601                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
602                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
603                         (unsigned long)&fatal_address, 0, 1),
604                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
605                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
606         };
607         struct sock_fprog prog = {
608                 .len = (unsigned short)ARRAY_SIZE(filter),
609                 .filter = filter,
610         };
611         long ret;
612         pid_t parent = getppid();
613         struct tms timebuf;
614         clock_t clock = times(&timebuf);
615 
616         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
617         ASSERT_EQ(0, ret);
618 
619         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
620         ASSERT_EQ(0, ret);
621 
622         EXPECT_EQ(parent, syscall(__NR_getppid));
623         EXPECT_LE(clock, syscall(__NR_times, &timebuf));
624         /* times() should never return. */
625         EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
626 }
627 
628 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
629 {
630 #ifndef __NR_mmap2
631         int sysno = __NR_mmap;
632 #else
633         int sysno = __NR_mmap2;
634 #endif
635         struct sock_filter filter[] = {
636                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
637                         offsetof(struct seccomp_data, nr)),
638                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
639                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
640                 /* Only both with lower 32-bit for now. */
641                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
642                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
643                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
644                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
645         };
646         struct sock_fprog prog = {
647                 .len = (unsigned short)ARRAY_SIZE(filter),
648                 .filter = filter,
649         };
650         long ret;
651         pid_t parent = getppid();
652         int fd;
653         void *map1, *map2;
654         int page_size = sysconf(_SC_PAGESIZE);
655 
656         ASSERT_LT(0, page_size);
657 
658         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
659         ASSERT_EQ(0, ret);
660 
661         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
662         ASSERT_EQ(0, ret);
663 
664         fd = open("/dev/zero", O_RDONLY);
665         ASSERT_NE(-1, fd);
666 
667         EXPECT_EQ(parent, syscall(__NR_getppid));
668         map1 = (void *)syscall(sysno,
669                 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
670         EXPECT_NE(MAP_FAILED, map1);
671         /* mmap2() should never return. */
672         map2 = (void *)syscall(sysno,
673                  NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
674         EXPECT_EQ(MAP_FAILED, map2);
675 
676         /* The test failed, so clean up the resources. */
677         munmap(map1, page_size);
678         munmap(map2, page_size);
679         close(fd);
680 }
681 
682 /* This is a thread task to die via seccomp filter violation. */
683 void *kill_thread(void *data)
684 {
685         bool die = (bool)data;
686 
687         if (die) {
688                 prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
689                 return (void *)SIBLING_EXIT_FAILURE;
690         }
691 
692         return (void *)SIBLING_EXIT_UNKILLED;
693 }
694 
695 /* Prepare a thread that will kill itself or both of us. */
696 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
697 {
698         pthread_t thread;
699         void *status;
700         /* Kill only when calling __NR_prctl. */
701         struct sock_filter filter_thread[] = {
702                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
703                         offsetof(struct seccomp_data, nr)),
704                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
705                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
706                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
707         };
708         struct sock_fprog prog_thread = {
709                 .len = (unsigned short)ARRAY_SIZE(filter_thread),
710                 .filter = filter_thread,
711         };
712         struct sock_filter filter_process[] = {
713                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
714                         offsetof(struct seccomp_data, nr)),
715                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
716                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
717                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
718         };
719         struct sock_fprog prog_process = {
720                 .len = (unsigned short)ARRAY_SIZE(filter_process),
721                 .filter = filter_process,
722         };
723 
724         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
725                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
726         }
727 
728         ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
729                              kill_process ? &prog_process : &prog_thread));
730 
731         /*
732          * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
733          * flag cannot be downgraded by a new filter.
734          */
735         ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
736 
737         /* Start a thread that will exit immediately. */
738         ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
739         ASSERT_EQ(0, pthread_join(thread, &status));
740         ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
741 
742         /* Start a thread that will die immediately. */
743         ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
744         ASSERT_EQ(0, pthread_join(thread, &status));
745         ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
746 
747         /*
748          * If we get here, only the spawned thread died. Let the parent know
749          * the whole process didn't die (i.e. this thread, the spawner,
750          * stayed running).
751          */
752         exit(42);
753 }
754 
755 TEST(KILL_thread)
756 {
757         int status;
758         pid_t child_pid;
759 
760         child_pid = fork();
761         ASSERT_LE(0, child_pid);
762         if (child_pid == 0) {
763                 kill_thread_or_group(_metadata, false);
764                 _exit(38);
765         }
766 
767         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
768 
769         /* If only the thread was killed, we'll see exit 42. */
770         ASSERT_TRUE(WIFEXITED(status));
771         ASSERT_EQ(42, WEXITSTATUS(status));
772 }
773 
774 TEST(KILL_process)
775 {
776         int status;
777         pid_t child_pid;
778 
779         child_pid = fork();
780         ASSERT_LE(0, child_pid);
781         if (child_pid == 0) {
782                 kill_thread_or_group(_metadata, true);
783                 _exit(38);
784         }
785 
786         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
787 
788         /* If the entire process was killed, we'll see SIGSYS. */
789         ASSERT_TRUE(WIFSIGNALED(status));
790         ASSERT_EQ(SIGSYS, WTERMSIG(status));
791 }
792 
793 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
794 TEST(arg_out_of_range)
795 {
796         struct sock_filter filter[] = {
797                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
798                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
799         };
800         struct sock_fprog prog = {
801                 .len = (unsigned short)ARRAY_SIZE(filter),
802                 .filter = filter,
803         };
804         long ret;
805 
806         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
807         ASSERT_EQ(0, ret);
808 
809         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
810         EXPECT_EQ(-1, ret);
811         EXPECT_EQ(EINVAL, errno);
812 }
813 
814 #define ERRNO_FILTER(name, errno)                                       \
815         struct sock_filter _read_filter_##name[] = {                    \
816                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,                          \
817                         offsetof(struct seccomp_data, nr)),             \
818                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),       \
819                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),     \
820                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),             \
821         };                                                              \
822         struct sock_fprog prog_##name = {                               \
823                 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \
824                 .filter = _read_filter_##name,                          \
825         }
826 
827 /* Make sure basic errno values are correctly passed through a filter. */
828 TEST(ERRNO_valid)
829 {
830         ERRNO_FILTER(valid, E2BIG);
831         long ret;
832         pid_t parent = getppid();
833 
834         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
835         ASSERT_EQ(0, ret);
836 
837         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
838         ASSERT_EQ(0, ret);
839 
840         EXPECT_EQ(parent, syscall(__NR_getppid));
841         EXPECT_EQ(-1, read(0, NULL, 0));
842         EXPECT_EQ(E2BIG, errno);
843 }
844 
845 /* Make sure an errno of zero is correctly handled by the arch code. */
846 TEST(ERRNO_zero)
847 {
848         ERRNO_FILTER(zero, 0);
849         long ret;
850         pid_t parent = getppid();
851 
852         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
853         ASSERT_EQ(0, ret);
854 
855         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
856         ASSERT_EQ(0, ret);
857 
858         EXPECT_EQ(parent, syscall(__NR_getppid));
859         /* "errno" of 0 is ok. */
860         EXPECT_EQ(0, read(0, NULL, 0));
861 }
862 
863 /*
864  * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
865  * This tests that the errno value gets capped correctly, fixed by
866  * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
867  */
868 TEST(ERRNO_capped)
869 {
870         ERRNO_FILTER(capped, 4096);
871         long ret;
872         pid_t parent = getppid();
873 
874         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
875         ASSERT_EQ(0, ret);
876 
877         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
878         ASSERT_EQ(0, ret);
879 
880         EXPECT_EQ(parent, syscall(__NR_getppid));
881         EXPECT_EQ(-1, read(0, NULL, 0));
882         EXPECT_EQ(4095, errno);
883 }
884 
885 /*
886  * Filters are processed in reverse order: last applied is executed first.
887  * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
888  * SECCOMP_RET_DATA mask results will follow the most recently applied
889  * matching filter return (and not the lowest or highest value).
890  */
891 TEST(ERRNO_order)
892 {
893         ERRNO_FILTER(first,  11);
894         ERRNO_FILTER(second, 13);
895         ERRNO_FILTER(third,  12);
896         long ret;
897         pid_t parent = getppid();
898 
899         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
900         ASSERT_EQ(0, ret);
901 
902         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
903         ASSERT_EQ(0, ret);
904 
905         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
906         ASSERT_EQ(0, ret);
907 
908         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
909         ASSERT_EQ(0, ret);
910 
911         EXPECT_EQ(parent, syscall(__NR_getppid));
912         EXPECT_EQ(-1, read(0, NULL, 0));
913         EXPECT_EQ(12, errno);
914 }
915 
916 FIXTURE(TRAP) {
917         struct sock_fprog prog;
918 };
919 
920 FIXTURE_SETUP(TRAP)
921 {
922         struct sock_filter filter[] = {
923                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
924                         offsetof(struct seccomp_data, nr)),
925                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
926                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
927                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
928         };
929 
930         memset(&self->prog, 0, sizeof(self->prog));
931         self->prog.filter = malloc(sizeof(filter));
932         ASSERT_NE(NULL, self->prog.filter);
933         memcpy(self->prog.filter, filter, sizeof(filter));
934         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
935 }
936 
937 FIXTURE_TEARDOWN(TRAP)
938 {
939         if (self->prog.filter)
940                 free(self->prog.filter);
941 }
942 
943 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
944 {
945         long ret;
946 
947         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
948         ASSERT_EQ(0, ret);
949 
950         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
951         ASSERT_EQ(0, ret);
952         syscall(__NR_getpid);
953 }
954 
955 /* Ensure that SIGSYS overrides SIG_IGN */
956 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
957 {
958         long ret;
959 
960         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
961         ASSERT_EQ(0, ret);
962 
963         signal(SIGSYS, SIG_IGN);
964 
965         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
966         ASSERT_EQ(0, ret);
967         syscall(__NR_getpid);
968 }
969 
970 static siginfo_t TRAP_info;
971 static volatile int TRAP_nr;
972 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
973 {
974         memcpy(&TRAP_info, info, sizeof(TRAP_info));
975         TRAP_nr = nr;
976 }
977 
978 TEST_F(TRAP, handler)
979 {
980         int ret, test;
981         struct sigaction act;
982         sigset_t mask;
983 
984         memset(&act, 0, sizeof(act));
985         sigemptyset(&mask);
986         sigaddset(&mask, SIGSYS);
987 
988         act.sa_sigaction = &TRAP_action;
989         act.sa_flags = SA_SIGINFO;
990         ret = sigaction(SIGSYS, &act, NULL);
991         ASSERT_EQ(0, ret) {
992                 TH_LOG("sigaction failed");
993         }
994         ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
995         ASSERT_EQ(0, ret) {
996                 TH_LOG("sigprocmask failed");
997         }
998 
999         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1000         ASSERT_EQ(0, ret);
1001         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1002         ASSERT_EQ(0, ret);
1003         TRAP_nr = 0;
1004         memset(&TRAP_info, 0, sizeof(TRAP_info));
1005         /* Expect the registers to be rolled back. (nr = error) may vary
1006          * based on arch. */
1007         ret = syscall(__NR_getpid);
1008         /* Silence gcc warning about volatile. */
1009         test = TRAP_nr;
1010         EXPECT_EQ(SIGSYS, test);
1011         struct local_sigsys {
1012                 void *_call_addr;       /* calling user insn */
1013                 int _syscall;           /* triggering system call number */
1014                 unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
1015         } *sigsys = (struct local_sigsys *)
1016 #ifdef si_syscall
1017                 &(TRAP_info.si_call_addr);
1018 #else
1019                 &TRAP_info.si_pid;
1020 #endif
1021         EXPECT_EQ(__NR_getpid, sigsys->_syscall);
1022         /* Make sure arch is non-zero. */
1023         EXPECT_NE(0, sigsys->_arch);
1024         EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
1025 }
1026 
1027 FIXTURE(precedence) {
1028         struct sock_fprog allow;
1029         struct sock_fprog log;
1030         struct sock_fprog trace;
1031         struct sock_fprog error;
1032         struct sock_fprog trap;
1033         struct sock_fprog kill;
1034 };
1035 
1036 FIXTURE_SETUP(precedence)
1037 {
1038         struct sock_filter allow_insns[] = {
1039                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1040         };
1041         struct sock_filter log_insns[] = {
1042                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1043                         offsetof(struct seccomp_data, nr)),
1044                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1045                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1046                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
1047         };
1048         struct sock_filter trace_insns[] = {
1049                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1050                         offsetof(struct seccomp_data, nr)),
1051                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1052                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1053                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
1054         };
1055         struct sock_filter error_insns[] = {
1056                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1057                         offsetof(struct seccomp_data, nr)),
1058                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1059                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1060                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
1061         };
1062         struct sock_filter trap_insns[] = {
1063                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1064                         offsetof(struct seccomp_data, nr)),
1065                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1066                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1067                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1068         };
1069         struct sock_filter kill_insns[] = {
1070                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1071                         offsetof(struct seccomp_data, nr)),
1072                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1073                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1074                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1075         };
1076 
1077         memset(self, 0, sizeof(*self));
1078 #define FILTER_ALLOC(_x) \
1079         self->_x.filter = malloc(sizeof(_x##_insns)); \
1080         ASSERT_NE(NULL, self->_x.filter); \
1081         memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1082         self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1083         FILTER_ALLOC(allow);
1084         FILTER_ALLOC(log);
1085         FILTER_ALLOC(trace);
1086         FILTER_ALLOC(error);
1087         FILTER_ALLOC(trap);
1088         FILTER_ALLOC(kill);
1089 }
1090 
1091 FIXTURE_TEARDOWN(precedence)
1092 {
1093 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1094         FILTER_FREE(allow);
1095         FILTER_FREE(log);
1096         FILTER_FREE(trace);
1097         FILTER_FREE(error);
1098         FILTER_FREE(trap);
1099         FILTER_FREE(kill);
1100 }
1101 
1102 TEST_F(precedence, allow_ok)
1103 {
1104         pid_t parent, res = 0;
1105         long ret;
1106 
1107         parent = getppid();
1108         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1109         ASSERT_EQ(0, ret);
1110 
1111         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1112         ASSERT_EQ(0, ret);
1113         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1114         ASSERT_EQ(0, ret);
1115         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1116         ASSERT_EQ(0, ret);
1117         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1118         ASSERT_EQ(0, ret);
1119         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1120         ASSERT_EQ(0, ret);
1121         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1122         ASSERT_EQ(0, ret);
1123         /* Should work just fine. */
1124         res = syscall(__NR_getppid);
1125         EXPECT_EQ(parent, res);
1126 }
1127 
1128 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1129 {
1130         pid_t parent, res = 0;
1131         long ret;
1132 
1133         parent = getppid();
1134         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1135         ASSERT_EQ(0, ret);
1136 
1137         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1138         ASSERT_EQ(0, ret);
1139         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1140         ASSERT_EQ(0, ret);
1141         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1142         ASSERT_EQ(0, ret);
1143         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1144         ASSERT_EQ(0, ret);
1145         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1146         ASSERT_EQ(0, ret);
1147         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1148         ASSERT_EQ(0, ret);
1149         /* Should work just fine. */
1150         res = syscall(__NR_getppid);
1151         EXPECT_EQ(parent, res);
1152         /* getpid() should never return. */
1153         res = syscall(__NR_getpid);
1154         EXPECT_EQ(0, res);
1155 }
1156 
1157 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1158 {
1159         pid_t parent;
1160         long ret;
1161 
1162         parent = getppid();
1163         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1164         ASSERT_EQ(0, ret);
1165 
1166         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1167         ASSERT_EQ(0, ret);
1168         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1169         ASSERT_EQ(0, ret);
1170         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1171         ASSERT_EQ(0, ret);
1172         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1173         ASSERT_EQ(0, ret);
1174         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1175         ASSERT_EQ(0, ret);
1176         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1177         ASSERT_EQ(0, ret);
1178         /* Should work just fine. */
1179         EXPECT_EQ(parent, syscall(__NR_getppid));
1180         /* getpid() should never return. */
1181         EXPECT_EQ(0, syscall(__NR_getpid));
1182 }
1183 
1184 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1185 {
1186         pid_t parent;
1187         long ret;
1188 
1189         parent = getppid();
1190         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1191         ASSERT_EQ(0, ret);
1192 
1193         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1194         ASSERT_EQ(0, ret);
1195         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1196         ASSERT_EQ(0, ret);
1197         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1198         ASSERT_EQ(0, ret);
1199         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1200         ASSERT_EQ(0, ret);
1201         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1202         ASSERT_EQ(0, ret);
1203         /* Should work just fine. */
1204         EXPECT_EQ(parent, syscall(__NR_getppid));
1205         /* getpid() should never return. */
1206         EXPECT_EQ(0, syscall(__NR_getpid));
1207 }
1208 
1209 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1210 {
1211         pid_t parent;
1212         long ret;
1213 
1214         parent = getppid();
1215         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1216         ASSERT_EQ(0, ret);
1217 
1218         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1219         ASSERT_EQ(0, ret);
1220         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1221         ASSERT_EQ(0, ret);
1222         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1223         ASSERT_EQ(0, ret);
1224         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1225         ASSERT_EQ(0, ret);
1226         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1227         ASSERT_EQ(0, ret);
1228         /* Should work just fine. */
1229         EXPECT_EQ(parent, syscall(__NR_getppid));
1230         /* getpid() should never return. */
1231         EXPECT_EQ(0, syscall(__NR_getpid));
1232 }
1233 
1234 TEST_F(precedence, errno_is_third)
1235 {
1236         pid_t parent;
1237         long ret;
1238 
1239         parent = getppid();
1240         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1241         ASSERT_EQ(0, ret);
1242 
1243         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1244         ASSERT_EQ(0, ret);
1245         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1246         ASSERT_EQ(0, ret);
1247         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1248         ASSERT_EQ(0, ret);
1249         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1250         ASSERT_EQ(0, ret);
1251         /* Should work just fine. */
1252         EXPECT_EQ(parent, syscall(__NR_getppid));
1253         EXPECT_EQ(0, syscall(__NR_getpid));
1254 }
1255 
1256 TEST_F(precedence, errno_is_third_in_any_order)
1257 {
1258         pid_t parent;
1259         long ret;
1260 
1261         parent = getppid();
1262         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1263         ASSERT_EQ(0, ret);
1264 
1265         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1266         ASSERT_EQ(0, ret);
1267         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1268         ASSERT_EQ(0, ret);
1269         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1270         ASSERT_EQ(0, ret);
1271         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1272         ASSERT_EQ(0, ret);
1273         /* Should work just fine. */
1274         EXPECT_EQ(parent, syscall(__NR_getppid));
1275         EXPECT_EQ(0, syscall(__NR_getpid));
1276 }
1277 
1278 TEST_F(precedence, trace_is_fourth)
1279 {
1280         pid_t parent;
1281         long ret;
1282 
1283         parent = getppid();
1284         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1285         ASSERT_EQ(0, ret);
1286 
1287         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1288         ASSERT_EQ(0, ret);
1289         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1290         ASSERT_EQ(0, ret);
1291         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1292         ASSERT_EQ(0, ret);
1293         /* Should work just fine. */
1294         EXPECT_EQ(parent, syscall(__NR_getppid));
1295         /* No ptracer */
1296         EXPECT_EQ(-1, syscall(__NR_getpid));
1297 }
1298 
1299 TEST_F(precedence, trace_is_fourth_in_any_order)
1300 {
1301         pid_t parent;
1302         long ret;
1303 
1304         parent = getppid();
1305         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1306         ASSERT_EQ(0, ret);
1307 
1308         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1309         ASSERT_EQ(0, ret);
1310         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1311         ASSERT_EQ(0, ret);
1312         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1313         ASSERT_EQ(0, ret);
1314         /* Should work just fine. */
1315         EXPECT_EQ(parent, syscall(__NR_getppid));
1316         /* No ptracer */
1317         EXPECT_EQ(-1, syscall(__NR_getpid));
1318 }
1319 
1320 TEST_F(precedence, log_is_fifth)
1321 {
1322         pid_t mypid, parent;
1323         long ret;
1324 
1325         mypid = getpid();
1326         parent = getppid();
1327         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1328         ASSERT_EQ(0, ret);
1329 
1330         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1331         ASSERT_EQ(0, ret);
1332         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1333         ASSERT_EQ(0, ret);
1334         /* Should work just fine. */
1335         EXPECT_EQ(parent, syscall(__NR_getppid));
1336         /* Should also work just fine */
1337         EXPECT_EQ(mypid, syscall(__NR_getpid));
1338 }
1339 
1340 TEST_F(precedence, log_is_fifth_in_any_order)
1341 {
1342         pid_t mypid, parent;
1343         long ret;
1344 
1345         mypid = getpid();
1346         parent = getppid();
1347         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1348         ASSERT_EQ(0, ret);
1349 
1350         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1351         ASSERT_EQ(0, ret);
1352         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1353         ASSERT_EQ(0, ret);
1354         /* Should work just fine. */
1355         EXPECT_EQ(parent, syscall(__NR_getppid));
1356         /* Should also work just fine */
1357         EXPECT_EQ(mypid, syscall(__NR_getpid));
1358 }
1359 
1360 #ifndef PTRACE_O_TRACESECCOMP
1361 #define PTRACE_O_TRACESECCOMP   0x00000080
1362 #endif
1363 
1364 /* Catch the Ubuntu 12.04 value error. */
1365 #if PTRACE_EVENT_SECCOMP != 7
1366 #undef PTRACE_EVENT_SECCOMP
1367 #endif
1368 
1369 #ifndef PTRACE_EVENT_SECCOMP
1370 #define PTRACE_EVENT_SECCOMP 7
1371 #endif
1372 
1373 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1374 bool tracer_running;
1375 void tracer_stop(int sig)
1376 {
1377         tracer_running = false;
1378 }
1379 
1380 typedef void tracer_func_t(struct __test_metadata *_metadata,
1381                            pid_t tracee, int status, void *args);
1382 
1383 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1384             tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1385 {
1386         int ret = -1;
1387         struct sigaction action = {
1388                 .sa_handler = tracer_stop,
1389         };
1390 
1391         /* Allow external shutdown. */
1392         tracer_running = true;
1393         ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1394 
1395         errno = 0;
1396         while (ret == -1 && errno != EINVAL)
1397                 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1398         ASSERT_EQ(0, ret) {
1399                 kill(tracee, SIGKILL);
1400         }
1401         /* Wait for attach stop */
1402         wait(NULL);
1403 
1404         ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1405                                                       PTRACE_O_TRACESYSGOOD :
1406                                                       PTRACE_O_TRACESECCOMP);
1407         ASSERT_EQ(0, ret) {
1408                 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1409                 kill(tracee, SIGKILL);
1410         }
1411         ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1412                      tracee, NULL, 0);
1413         ASSERT_EQ(0, ret);
1414 
1415         /* Unblock the tracee */
1416         ASSERT_EQ(1, write(fd, "A", 1));
1417         ASSERT_EQ(0, close(fd));
1418 
1419         /* Run until we're shut down. Must assert to stop execution. */
1420         while (tracer_running) {
1421                 int status;
1422 
1423                 if (wait(&status) != tracee)
1424                         continue;
1425                 if (WIFSIGNALED(status) || WIFEXITED(status))
1426                         /* Child is dead. Time to go. */
1427                         return;
1428 
1429                 /* Check if this is a seccomp event. */
1430                 ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1431 
1432                 tracer_func(_metadata, tracee, status, args);
1433 
1434                 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1435                              tracee, NULL, 0);
1436                 ASSERT_EQ(0, ret);
1437         }
1438         /* Directly report the status of our test harness results. */
1439         syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1440 }
1441 
1442 /* Common tracer setup/teardown functions. */
1443 void cont_handler(int num)
1444 { }
1445 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1446                           tracer_func_t func, void *args, bool ptrace_syscall)
1447 {
1448         char sync;
1449         int pipefd[2];
1450         pid_t tracer_pid;
1451         pid_t tracee = getpid();
1452 
1453         /* Setup a pipe for clean synchronization. */
1454         ASSERT_EQ(0, pipe(pipefd));
1455 
1456         /* Fork a child which we'll promote to tracer */
1457         tracer_pid = fork();
1458         ASSERT_LE(0, tracer_pid);
1459         signal(SIGALRM, cont_handler);
1460         if (tracer_pid == 0) {
1461                 close(pipefd[0]);
1462                 start_tracer(_metadata, pipefd[1], tracee, func, args,
1463                              ptrace_syscall);
1464                 syscall(__NR_exit, 0);
1465         }
1466         close(pipefd[1]);
1467         prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1468         read(pipefd[0], &sync, 1);
1469         close(pipefd[0]);
1470 
1471         return tracer_pid;
1472 }
1473 void teardown_trace_fixture(struct __test_metadata *_metadata,
1474                             pid_t tracer)
1475 {
1476         if (tracer) {
1477                 int status;
1478                 /*
1479                  * Extract the exit code from the other process and
1480                  * adopt it for ourselves in case its asserts failed.
1481                  */
1482                 ASSERT_EQ(0, kill(tracer, SIGUSR1));
1483                 ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1484                 if (WEXITSTATUS(status))
1485                         _metadata->passed = 0;
1486         }
1487 }
1488 
1489 /* "poke" tracer arguments and function. */
1490 struct tracer_args_poke_t {
1491         unsigned long poke_addr;
1492 };
1493 
1494 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1495                  void *args)
1496 {
1497         int ret;
1498         unsigned long msg;
1499         struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1500 
1501         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1502         EXPECT_EQ(0, ret);
1503         /* If this fails, don't try to recover. */
1504         ASSERT_EQ(0x1001, msg) {
1505                 kill(tracee, SIGKILL);
1506         }
1507         /*
1508          * Poke in the message.
1509          * Registers are not touched to try to keep this relatively arch
1510          * agnostic.
1511          */
1512         ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1513         EXPECT_EQ(0, ret);
1514 }
1515 
1516 FIXTURE(TRACE_poke) {
1517         struct sock_fprog prog;
1518         pid_t tracer;
1519         long poked;
1520         struct tracer_args_poke_t tracer_args;
1521 };
1522 
1523 FIXTURE_SETUP(TRACE_poke)
1524 {
1525         struct sock_filter filter[] = {
1526                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1527                         offsetof(struct seccomp_data, nr)),
1528                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1529                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1530                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1531         };
1532 
1533         self->poked = 0;
1534         memset(&self->prog, 0, sizeof(self->prog));
1535         self->prog.filter = malloc(sizeof(filter));
1536         ASSERT_NE(NULL, self->prog.filter);
1537         memcpy(self->prog.filter, filter, sizeof(filter));
1538         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1539 
1540         /* Set up tracer args. */
1541         self->tracer_args.poke_addr = (unsigned long)&self->poked;
1542 
1543         /* Launch tracer. */
1544         self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1545                                            &self->tracer_args, false);
1546 }
1547 
1548 FIXTURE_TEARDOWN(TRACE_poke)
1549 {
1550         teardown_trace_fixture(_metadata, self->tracer);
1551         if (self->prog.filter)
1552                 free(self->prog.filter);
1553 }
1554 
1555 TEST_F(TRACE_poke, read_has_side_effects)
1556 {
1557         ssize_t ret;
1558 
1559         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1560         ASSERT_EQ(0, ret);
1561 
1562         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1563         ASSERT_EQ(0, ret);
1564 
1565         EXPECT_EQ(0, self->poked);
1566         ret = read(-1, NULL, 0);
1567         EXPECT_EQ(-1, ret);
1568         EXPECT_EQ(0x1001, self->poked);
1569 }
1570 
1571 TEST_F(TRACE_poke, getpid_runs_normally)
1572 {
1573         long ret;
1574 
1575         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1576         ASSERT_EQ(0, ret);
1577 
1578         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1579         ASSERT_EQ(0, ret);
1580 
1581         EXPECT_EQ(0, self->poked);
1582         EXPECT_NE(0, syscall(__NR_getpid));
1583         EXPECT_EQ(0, self->poked);
1584 }
1585 
1586 #if defined(__x86_64__)
1587 # define ARCH_REGS      struct user_regs_struct
1588 # define SYSCALL_NUM    orig_rax
1589 # define SYSCALL_RET    rax
1590 #elif defined(__i386__)
1591 # define ARCH_REGS      struct user_regs_struct
1592 # define SYSCALL_NUM    orig_eax
1593 # define SYSCALL_RET    eax
1594 #elif defined(__arm__)
1595 # define ARCH_REGS      struct pt_regs
1596 # define SYSCALL_NUM    ARM_r7
1597 # define SYSCALL_RET    ARM_r0
1598 #elif defined(__aarch64__)
1599 # define ARCH_REGS      struct user_pt_regs
1600 # define SYSCALL_NUM    regs[8]
1601 # define SYSCALL_RET    regs[0]
1602 #elif defined(__riscv) && __riscv_xlen == 64
1603 # define ARCH_REGS      struct user_regs_struct
1604 # define SYSCALL_NUM    a7
1605 # define SYSCALL_RET    a0
1606 #elif defined(__hppa__)
1607 # define ARCH_REGS      struct user_regs_struct
1608 # define SYSCALL_NUM    gr[20]
1609 # define SYSCALL_RET    gr[28]
1610 #elif defined(__powerpc__)
1611 # define ARCH_REGS      struct pt_regs
1612 # define SYSCALL_NUM    gpr[0]
1613 # define SYSCALL_RET    gpr[3]
1614 #elif defined(__s390__)
1615 # define ARCH_REGS     s390_regs
1616 # define SYSCALL_NUM   gprs[2]
1617 # define SYSCALL_RET   gprs[2]
1618 # define SYSCALL_NUM_RET_SHARE_REG
1619 #elif defined(__mips__)
1620 # define ARCH_REGS      struct pt_regs
1621 # define SYSCALL_NUM    regs[2]
1622 # define SYSCALL_SYSCALL_NUM regs[4]
1623 # define SYSCALL_RET    regs[2]
1624 # define SYSCALL_NUM_RET_SHARE_REG
1625 #else
1626 # error "Do not know how to find your architecture's registers and syscalls"
1627 #endif
1628 
1629 /* When the syscall return can't be changed, stub out the tests for it. */
1630 #ifdef SYSCALL_NUM_RET_SHARE_REG
1631 # define EXPECT_SYSCALL_RETURN(val, action)     EXPECT_EQ(-1, action)
1632 #else
1633 # define EXPECT_SYSCALL_RETURN(val, action)             \
1634         do {                                            \
1635                 errno = 0;                              \
1636                 if (val < 0) {                          \
1637                         EXPECT_EQ(-1, action);          \
1638                         EXPECT_EQ(-(val), errno);       \
1639                 } else {                                \
1640                         EXPECT_EQ(val, action);         \
1641                 }                                       \
1642         } while (0)
1643 #endif
1644 
1645 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1646  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1647  */
1648 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1649 #define HAVE_GETREGS
1650 #endif
1651 
1652 /* Architecture-specific syscall fetching routine. */
1653 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1654 {
1655         ARCH_REGS regs;
1656 #ifdef HAVE_GETREGS
1657         EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1658                 TH_LOG("PTRACE_GETREGS failed");
1659                 return -1;
1660         }
1661 #else
1662         struct iovec iov;
1663 
1664         iov.iov_base = &regs;
1665         iov.iov_len = sizeof(regs);
1666         EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1667                 TH_LOG("PTRACE_GETREGSET failed");
1668                 return -1;
1669         }
1670 #endif
1671 
1672 #if defined(__mips__)
1673         if (regs.SYSCALL_NUM == __NR_O32_Linux)
1674                 return regs.SYSCALL_SYSCALL_NUM;
1675 #endif
1676         return regs.SYSCALL_NUM;
1677 }
1678 
1679 /* Architecture-specific syscall changing routine. */
1680 void change_syscall(struct __test_metadata *_metadata,
1681                     pid_t tracee, int syscall, int result)
1682 {
1683         int ret;
1684         ARCH_REGS regs;
1685 #ifdef HAVE_GETREGS
1686         ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1687 #else
1688         struct iovec iov;
1689         iov.iov_base = &regs;
1690         iov.iov_len = sizeof(regs);
1691         ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1692 #endif
1693         EXPECT_EQ(0, ret) {}
1694 
1695 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1696         defined(__s390__) || defined(__hppa__) || defined(__riscv)
1697         {
1698                 regs.SYSCALL_NUM = syscall;
1699         }
1700 #elif defined(__mips__)
1701         {
1702                 if (regs.SYSCALL_NUM == __NR_O32_Linux)
1703                         regs.SYSCALL_SYSCALL_NUM = syscall;
1704                 else
1705                         regs.SYSCALL_NUM = syscall;
1706         }
1707 
1708 #elif defined(__arm__)
1709 # ifndef PTRACE_SET_SYSCALL
1710 #  define PTRACE_SET_SYSCALL   23
1711 # endif
1712         {
1713                 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1714                 EXPECT_EQ(0, ret);
1715         }
1716 
1717 #elif defined(__aarch64__)
1718 # ifndef NT_ARM_SYSTEM_CALL
1719 #  define NT_ARM_SYSTEM_CALL 0x404
1720 # endif
1721         {
1722                 iov.iov_base = &syscall;
1723                 iov.iov_len = sizeof(syscall);
1724                 ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1725                              &iov);
1726                 EXPECT_EQ(0, ret);
1727         }
1728 
1729 #else
1730         ASSERT_EQ(1, 0) {
1731                 TH_LOG("How is the syscall changed on this architecture?");
1732         }
1733 #endif
1734 
1735         /* If syscall is skipped, change return value. */
1736         if (syscall == -1)
1737 #ifdef SYSCALL_NUM_RET_SHARE_REG
1738                 TH_LOG("Can't modify syscall return on this architecture");
1739 #else
1740                 regs.SYSCALL_RET = result;
1741 #endif
1742 
1743 #ifdef HAVE_GETREGS
1744         ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1745 #else
1746         iov.iov_base = &regs;
1747         iov.iov_len = sizeof(regs);
1748         ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1749 #endif
1750         EXPECT_EQ(0, ret);
1751 }
1752 
1753 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1754                     int status, void *args)
1755 {
1756         int ret;
1757         unsigned long msg;
1758 
1759         /* Make sure we got the right message. */
1760         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1761         EXPECT_EQ(0, ret);
1762 
1763         /* Validate and take action on expected syscalls. */
1764         switch (msg) {
1765         case 0x1002:
1766                 /* change getpid to getppid. */
1767                 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1768                 change_syscall(_metadata, tracee, __NR_getppid, 0);
1769                 break;
1770         case 0x1003:
1771                 /* skip gettid with valid return code. */
1772                 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1773                 change_syscall(_metadata, tracee, -1, 45000);
1774                 break;
1775         case 0x1004:
1776                 /* skip openat with error. */
1777                 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
1778                 change_syscall(_metadata, tracee, -1, -ESRCH);
1779                 break;
1780         case 0x1005:
1781                 /* do nothing (allow getppid) */
1782                 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1783                 break;
1784         default:
1785                 EXPECT_EQ(0, msg) {
1786                         TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1787                         kill(tracee, SIGKILL);
1788                 }
1789         }
1790 
1791 }
1792 
1793 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1794                    int status, void *args)
1795 {
1796         int ret, nr;
1797         unsigned long msg;
1798         static bool entry;
1799 
1800         /*
1801          * The traditional way to tell PTRACE_SYSCALL entry/exit
1802          * is by counting.
1803          */
1804         entry = !entry;
1805 
1806         /* Make sure we got an appropriate message. */
1807         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1808         EXPECT_EQ(0, ret);
1809         EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
1810                         : PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
1811 
1812         if (!entry)
1813                 return;
1814 
1815         nr = get_syscall(_metadata, tracee);
1816 
1817         if (nr == __NR_getpid)
1818                 change_syscall(_metadata, tracee, __NR_getppid, 0);
1819         if (nr == __NR_gettid)
1820                 change_syscall(_metadata, tracee, -1, 45000);
1821         if (nr == __NR_openat)
1822                 change_syscall(_metadata, tracee, -1, -ESRCH);
1823 }
1824 
1825 FIXTURE(TRACE_syscall) {
1826         struct sock_fprog prog;
1827         pid_t tracer, mytid, mypid, parent;
1828 };
1829 
1830 FIXTURE_SETUP(TRACE_syscall)
1831 {
1832         struct sock_filter filter[] = {
1833                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1834                         offsetof(struct seccomp_data, nr)),
1835                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1836                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1837                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1838                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1839                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
1840                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1841                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1842                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
1843                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1844         };
1845 
1846         memset(&self->prog, 0, sizeof(self->prog));
1847         self->prog.filter = malloc(sizeof(filter));
1848         ASSERT_NE(NULL, self->prog.filter);
1849         memcpy(self->prog.filter, filter, sizeof(filter));
1850         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1851 
1852         /* Prepare some testable syscall results. */
1853         self->mytid = syscall(__NR_gettid);
1854         ASSERT_GT(self->mytid, 0);
1855         ASSERT_NE(self->mytid, 1) {
1856                 TH_LOG("Running this test as init is not supported. :)");
1857         }
1858 
1859         self->mypid = getpid();
1860         ASSERT_GT(self->mypid, 0);
1861         ASSERT_EQ(self->mytid, self->mypid);
1862 
1863         self->parent = getppid();
1864         ASSERT_GT(self->parent, 0);
1865         ASSERT_NE(self->parent, self->mypid);
1866 
1867         /* Launch tracer. */
1868         self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1869                                            false);
1870 }
1871 
1872 FIXTURE_TEARDOWN(TRACE_syscall)
1873 {
1874         teardown_trace_fixture(_metadata, self->tracer);
1875         if (self->prog.filter)
1876                 free(self->prog.filter);
1877 }
1878 
1879 TEST_F(TRACE_syscall, ptrace_syscall_redirected)
1880 {
1881         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1882         teardown_trace_fixture(_metadata, self->tracer);
1883         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1884                                            true);
1885 
1886         /* Tracer will redirect getpid to getppid. */
1887         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1888 }
1889 
1890 TEST_F(TRACE_syscall, ptrace_syscall_errno)
1891 {
1892         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1893         teardown_trace_fixture(_metadata, self->tracer);
1894         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1895                                            true);
1896 
1897         /* Tracer should skip the open syscall, resulting in ESRCH. */
1898         EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1899 }
1900 
1901 TEST_F(TRACE_syscall, ptrace_syscall_faked)
1902 {
1903         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1904         teardown_trace_fixture(_metadata, self->tracer);
1905         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1906                                            true);
1907 
1908         /* Tracer should skip the gettid syscall, resulting fake pid. */
1909         EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1910 }
1911 
1912 TEST_F(TRACE_syscall, syscall_allowed)
1913 {
1914         long ret;
1915 
1916         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1917         ASSERT_EQ(0, ret);
1918 
1919         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1920         ASSERT_EQ(0, ret);
1921 
1922         /* getppid works as expected (no changes). */
1923         EXPECT_EQ(self->parent, syscall(__NR_getppid));
1924         EXPECT_NE(self->mypid, syscall(__NR_getppid));
1925 }
1926 
1927 TEST_F(TRACE_syscall, syscall_redirected)
1928 {
1929         long ret;
1930 
1931         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1932         ASSERT_EQ(0, ret);
1933 
1934         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1935         ASSERT_EQ(0, ret);
1936 
1937         /* getpid has been redirected to getppid as expected. */
1938         EXPECT_EQ(self->parent, syscall(__NR_getpid));
1939         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1940 }
1941 
1942 TEST_F(TRACE_syscall, syscall_errno)
1943 {
1944         long ret;
1945 
1946         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1947         ASSERT_EQ(0, ret);
1948 
1949         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1950         ASSERT_EQ(0, ret);
1951 
1952         /* openat has been skipped and an errno return. */
1953         EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1954 }
1955 
1956 TEST_F(TRACE_syscall, syscall_faked)
1957 {
1958         long ret;
1959 
1960         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1961         ASSERT_EQ(0, ret);
1962 
1963         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1964         ASSERT_EQ(0, ret);
1965 
1966         /* gettid has been skipped and an altered return value stored. */
1967         EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1968 }
1969 
1970 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1971 {
1972         struct sock_filter filter[] = {
1973                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1974                         offsetof(struct seccomp_data, nr)),
1975                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1976                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1977                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1978         };
1979         struct sock_fprog prog = {
1980                 .len = (unsigned short)ARRAY_SIZE(filter),
1981                 .filter = filter,
1982         };
1983         long ret;
1984 
1985         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1986         ASSERT_EQ(0, ret);
1987 
1988         /* Install fixture filter. */
1989         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1990         ASSERT_EQ(0, ret);
1991 
1992         /* Install "errno on getppid" filter. */
1993         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1994         ASSERT_EQ(0, ret);
1995 
1996         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1997         errno = 0;
1998         EXPECT_EQ(-1, syscall(__NR_getpid));
1999         EXPECT_EQ(EPERM, errno);
2000 }
2001 
2002 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
2003 {
2004         struct sock_filter filter[] = {
2005                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2006                         offsetof(struct seccomp_data, nr)),
2007                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2008                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2009                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2010         };
2011         struct sock_fprog prog = {
2012                 .len = (unsigned short)ARRAY_SIZE(filter),
2013                 .filter = filter,
2014         };
2015         long ret;
2016 
2017         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2018         ASSERT_EQ(0, ret);
2019 
2020         /* Install fixture filter. */
2021         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
2022         ASSERT_EQ(0, ret);
2023 
2024         /* Install "death on getppid" filter. */
2025         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2026         ASSERT_EQ(0, ret);
2027 
2028         /* Tracer will redirect getpid to getppid, and we should die. */
2029         EXPECT_NE(self->mypid, syscall(__NR_getpid));
2030 }
2031 
2032 TEST_F(TRACE_syscall, skip_after_ptrace)
2033 {
2034         struct sock_filter filter[] = {
2035                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2036                         offsetof(struct seccomp_data, nr)),
2037                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2038                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
2039                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2040         };
2041         struct sock_fprog prog = {
2042                 .len = (unsigned short)ARRAY_SIZE(filter),
2043                 .filter = filter,
2044         };
2045         long ret;
2046 
2047         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2048         teardown_trace_fixture(_metadata, self->tracer);
2049         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2050                                            true);
2051 
2052         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2053         ASSERT_EQ(0, ret);
2054 
2055         /* Install "errno on getppid" filter. */
2056         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2057         ASSERT_EQ(0, ret);
2058 
2059         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
2060         EXPECT_EQ(-1, syscall(__NR_getpid));
2061         EXPECT_EQ(EPERM, errno);
2062 }
2063 
2064 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
2065 {
2066         struct sock_filter filter[] = {
2067                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2068                         offsetof(struct seccomp_data, nr)),
2069                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2070                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2071                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2072         };
2073         struct sock_fprog prog = {
2074                 .len = (unsigned short)ARRAY_SIZE(filter),
2075                 .filter = filter,
2076         };
2077         long ret;
2078 
2079         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2080         teardown_trace_fixture(_metadata, self->tracer);
2081         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2082                                            true);
2083 
2084         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2085         ASSERT_EQ(0, ret);
2086 
2087         /* Install "death on getppid" filter. */
2088         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2089         ASSERT_EQ(0, ret);
2090 
2091         /* Tracer will redirect getpid to getppid, and we should die. */
2092         EXPECT_NE(self->mypid, syscall(__NR_getpid));
2093 }
2094 
2095 TEST(seccomp_syscall)
2096 {
2097         struct sock_filter filter[] = {
2098                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2099         };
2100         struct sock_fprog prog = {
2101                 .len = (unsigned short)ARRAY_SIZE(filter),
2102                 .filter = filter,
2103         };
2104         long ret;
2105 
2106         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2107         ASSERT_EQ(0, ret) {
2108                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2109         }
2110 
2111         /* Reject insane operation. */
2112         ret = seccomp(-1, 0, &prog);
2113         ASSERT_NE(ENOSYS, errno) {
2114                 TH_LOG("Kernel does not support seccomp syscall!");
2115         }
2116         EXPECT_EQ(EINVAL, errno) {
2117                 TH_LOG("Did not reject crazy op value!");
2118         }
2119 
2120         /* Reject strict with flags or pointer. */
2121         ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2122         EXPECT_EQ(EINVAL, errno) {
2123                 TH_LOG("Did not reject mode strict with flags!");
2124         }
2125         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2126         EXPECT_EQ(EINVAL, errno) {
2127                 TH_LOG("Did not reject mode strict with uargs!");
2128         }
2129 
2130         /* Reject insane args for filter. */
2131         ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2132         EXPECT_EQ(EINVAL, errno) {
2133                 TH_LOG("Did not reject crazy filter flags!");
2134         }
2135         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2136         EXPECT_EQ(EFAULT, errno) {
2137                 TH_LOG("Did not reject NULL filter!");
2138         }
2139 
2140         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2141         EXPECT_EQ(0, errno) {
2142                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2143                         strerror(errno));
2144         }
2145 }
2146 
2147 TEST(seccomp_syscall_mode_lock)
2148 {
2149         struct sock_filter filter[] = {
2150                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2151         };
2152         struct sock_fprog prog = {
2153                 .len = (unsigned short)ARRAY_SIZE(filter),
2154                 .filter = filter,
2155         };
2156         long ret;
2157 
2158         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2159         ASSERT_EQ(0, ret) {
2160                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2161         }
2162 
2163         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2164         ASSERT_NE(ENOSYS, errno) {
2165                 TH_LOG("Kernel does not support seccomp syscall!");
2166         }
2167         EXPECT_EQ(0, ret) {
2168                 TH_LOG("Could not install filter!");
2169         }
2170 
2171         /* Make sure neither entry point will switch to strict. */
2172         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2173         EXPECT_EQ(EINVAL, errno) {
2174                 TH_LOG("Switched to mode strict!");
2175         }
2176 
2177         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2178         EXPECT_EQ(EINVAL, errno) {
2179                 TH_LOG("Switched to mode strict!");
2180         }
2181 }
2182 
2183 /*
2184  * Test detection of known and unknown filter flags. Userspace needs to be able
2185  * to check if a filter flag is supported by the current kernel and a good way
2186  * of doing that is by attempting to enter filter mode, with the flag bit in
2187  * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2188  * that the flag is valid and EINVAL indicates that the flag is invalid.
2189  */
2190 TEST(detect_seccomp_filter_flags)
2191 {
2192         unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2193                                  SECCOMP_FILTER_FLAG_LOG,
2194                                  SECCOMP_FILTER_FLAG_SPEC_ALLOW,
2195                                  SECCOMP_FILTER_FLAG_NEW_LISTENER,
2196                                  SECCOMP_FILTER_FLAG_TSYNC_ESRCH };
2197         unsigned int exclusive[] = {
2198                                 SECCOMP_FILTER_FLAG_TSYNC,
2199                                 SECCOMP_FILTER_FLAG_NEW_LISTENER };
2200         unsigned int flag, all_flags, exclusive_mask;
2201         int i;
2202         long ret;
2203 
2204         /* Test detection of individual known-good filter flags */
2205         for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2206                 int bits = 0;
2207 
2208                 flag = flags[i];
2209                 /* Make sure the flag is a single bit! */
2210                 while (flag) {
2211                         if (flag & 0x1)
2212                                 bits ++;
2213                         flag >>= 1;
2214                 }
2215                 ASSERT_EQ(1, bits);
2216                 flag = flags[i];
2217 
2218                 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2219                 ASSERT_NE(ENOSYS, errno) {
2220                         TH_LOG("Kernel does not support seccomp syscall!");
2221                 }
2222                 EXPECT_EQ(-1, ret);
2223                 EXPECT_EQ(EFAULT, errno) {
2224                         TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2225                                flag);
2226                 }
2227 
2228                 all_flags |= flag;
2229         }
2230 
2231         /*
2232          * Test detection of all known-good filter flags combined. But
2233          * for the exclusive flags we need to mask them out and try them
2234          * individually for the "all flags" testing.
2235          */
2236         exclusive_mask = 0;
2237         for (i = 0; i < ARRAY_SIZE(exclusive); i++)
2238                 exclusive_mask |= exclusive[i];
2239         for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
2240                 flag = all_flags & ~exclusive_mask;
2241                 flag |= exclusive[i];
2242 
2243                 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2244                 EXPECT_EQ(-1, ret);
2245                 EXPECT_EQ(EFAULT, errno) {
2246                         TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2247                                flag);
2248                 }
2249         }
2250 
2251         /* Test detection of an unknown filter flags, without exclusives. */
2252         flag = -1;
2253         flag &= ~exclusive_mask;
2254         ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2255         EXPECT_EQ(-1, ret);
2256         EXPECT_EQ(EINVAL, errno) {
2257                 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2258                        flag);
2259         }
2260 
2261         /*
2262          * Test detection of an unknown filter flag that may simply need to be
2263          * added to this test
2264          */
2265         flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2266         ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2267         EXPECT_EQ(-1, ret);
2268         EXPECT_EQ(EINVAL, errno) {
2269                 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2270                        flag);
2271         }
2272 }
2273 
2274 TEST(TSYNC_first)
2275 {
2276         struct sock_filter filter[] = {
2277                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2278         };
2279         struct sock_fprog prog = {
2280                 .len = (unsigned short)ARRAY_SIZE(filter),
2281                 .filter = filter,
2282         };
2283         long ret;
2284 
2285         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2286         ASSERT_EQ(0, ret) {
2287                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2288         }
2289 
2290         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2291                       &prog);
2292         ASSERT_NE(ENOSYS, errno) {
2293                 TH_LOG("Kernel does not support seccomp syscall!");
2294         }
2295         EXPECT_EQ(0, ret) {
2296                 TH_LOG("Could not install initial filter with TSYNC!");
2297         }
2298 }
2299 
2300 #define TSYNC_SIBLINGS 2
2301 struct tsync_sibling {
2302         pthread_t tid;
2303         pid_t system_tid;
2304         sem_t *started;
2305         pthread_cond_t *cond;
2306         pthread_mutex_t *mutex;
2307         int diverge;
2308         int num_waits;
2309         struct sock_fprog *prog;
2310         struct __test_metadata *metadata;
2311 };
2312 
2313 /*
2314  * To avoid joining joined threads (which is not allowed by Bionic),
2315  * make sure we both successfully join and clear the tid to skip a
2316  * later join attempt during fixture teardown. Any remaining threads
2317  * will be directly killed during teardown.
2318  */
2319 #define PTHREAD_JOIN(tid, status)                                       \
2320         do {                                                            \
2321                 int _rc = pthread_join(tid, status);                    \
2322                 if (_rc) {                                              \
2323                         TH_LOG("pthread_join of tid %u failed: %d\n",   \
2324                                 (unsigned int)tid, _rc);                \
2325                 } else {                                                \
2326                         tid = 0;                                        \
2327                 }                                                       \
2328         } while (0)
2329 
2330 FIXTURE(TSYNC) {
2331         struct sock_fprog root_prog, apply_prog;
2332         struct tsync_sibling sibling[TSYNC_SIBLINGS];
2333         sem_t started;
2334         pthread_cond_t cond;
2335         pthread_mutex_t mutex;
2336         int sibling_count;
2337 };
2338 
2339 FIXTURE_SETUP(TSYNC)
2340 {
2341         struct sock_filter root_filter[] = {
2342                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2343         };
2344         struct sock_filter apply_filter[] = {
2345                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2346                         offsetof(struct seccomp_data, nr)),
2347                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2348                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2349                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2350         };
2351 
2352         memset(&self->root_prog, 0, sizeof(self->root_prog));
2353         memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2354         memset(&self->sibling, 0, sizeof(self->sibling));
2355         self->root_prog.filter = malloc(sizeof(root_filter));
2356         ASSERT_NE(NULL, self->root_prog.filter);
2357         memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2358         self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2359 
2360         self->apply_prog.filter = malloc(sizeof(apply_filter));
2361         ASSERT_NE(NULL, self->apply_prog.filter);
2362         memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2363         self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2364 
2365         self->sibling_count = 0;
2366         pthread_mutex_init(&self->mutex, NULL);
2367         pthread_cond_init(&self->cond, NULL);
2368         sem_init(&self->started, 0, 0);
2369         self->sibling[0].tid = 0;
2370         self->sibling[0].cond = &self->cond;
2371         self->sibling[0].started = &self->started;
2372         self->sibling[0].mutex = &self->mutex;
2373         self->sibling[0].diverge = 0;
2374         self->sibling[0].num_waits = 1;
2375         self->sibling[0].prog = &self->root_prog;
2376         self->sibling[0].metadata = _metadata;
2377         self->sibling[1].tid = 0;
2378         self->sibling[1].cond = &self->cond;
2379         self->sibling[1].started = &self->started;
2380         self->sibling[1].mutex = &self->mutex;
2381         self->sibling[1].diverge = 0;
2382         self->sibling[1].prog = &self->root_prog;
2383         self->sibling[1].num_waits = 1;
2384         self->sibling[1].metadata = _metadata;
2385 }
2386 
2387 FIXTURE_TEARDOWN(TSYNC)
2388 {
2389         int sib = 0;
2390 
2391         if (self->root_prog.filter)
2392                 free(self->root_prog.filter);
2393         if (self->apply_prog.filter)
2394                 free(self->apply_prog.filter);
2395 
2396         for ( ; sib < self->sibling_count; ++sib) {
2397                 struct tsync_sibling *s = &self->sibling[sib];
2398 
2399                 if (!s->tid)
2400                         continue;
2401                 /*
2402                  * If a thread is still running, it may be stuck, so hit
2403                  * it over the head really hard.
2404                  */
2405                 pthread_kill(s->tid, 9);
2406         }
2407         pthread_mutex_destroy(&self->mutex);
2408         pthread_cond_destroy(&self->cond);
2409         sem_destroy(&self->started);
2410 }
2411 
2412 void *tsync_sibling(void *data)
2413 {
2414         long ret = 0;
2415         struct tsync_sibling *me = data;
2416 
2417         me->system_tid = syscall(__NR_gettid);
2418 
2419         pthread_mutex_lock(me->mutex);
2420         if (me->diverge) {
2421                 /* Just re-apply the root prog to fork the tree */
2422                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2423                                 me->prog, 0, 0);
2424         }
2425         sem_post(me->started);
2426         /* Return outside of started so parent notices failures. */
2427         if (ret) {
2428                 pthread_mutex_unlock(me->mutex);
2429                 return (void *)SIBLING_EXIT_FAILURE;
2430         }
2431         do {
2432                 pthread_cond_wait(me->cond, me->mutex);
2433                 me->num_waits = me->num_waits - 1;
2434         } while (me->num_waits);
2435         pthread_mutex_unlock(me->mutex);
2436 
2437         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2438         if (!ret)
2439                 return (void *)SIBLING_EXIT_NEWPRIVS;
2440         read(0, NULL, 0);
2441         return (void *)SIBLING_EXIT_UNKILLED;
2442 }
2443 
2444 void tsync_start_sibling(struct tsync_sibling *sibling)
2445 {
2446         pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2447 }
2448 
2449 TEST_F(TSYNC, siblings_fail_prctl)
2450 {
2451         long ret;
2452         void *status;
2453         struct sock_filter filter[] = {
2454                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2455                         offsetof(struct seccomp_data, nr)),
2456                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2457                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2458                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2459         };
2460         struct sock_fprog prog = {
2461                 .len = (unsigned short)ARRAY_SIZE(filter),
2462                 .filter = filter,
2463         };
2464 
2465         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2466                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2467         }
2468 
2469         /* Check prctl failure detection by requesting sib 0 diverge. */
2470         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2471         ASSERT_NE(ENOSYS, errno) {
2472                 TH_LOG("Kernel does not support seccomp syscall!");
2473         }
2474         ASSERT_EQ(0, ret) {
2475                 TH_LOG("setting filter failed");
2476         }
2477 
2478         self->sibling[0].diverge = 1;
2479         tsync_start_sibling(&self->sibling[0]);
2480         tsync_start_sibling(&self->sibling[1]);
2481 
2482         while (self->sibling_count < TSYNC_SIBLINGS) {
2483                 sem_wait(&self->started);
2484                 self->sibling_count++;
2485         }
2486 
2487         /* Signal the threads to clean up*/
2488         pthread_mutex_lock(&self->mutex);
2489         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2490                 TH_LOG("cond broadcast non-zero");
2491         }
2492         pthread_mutex_unlock(&self->mutex);
2493 
2494         /* Ensure diverging sibling failed to call prctl. */
2495         PTHREAD_JOIN(self->sibling[0].tid, &status);
2496         EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2497         PTHREAD_JOIN(self->sibling[1].tid, &status);
2498         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2499 }
2500 
2501 TEST_F(TSYNC, two_siblings_with_ancestor)
2502 {
2503         long ret;
2504         void *status;
2505 
2506         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2507                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2508         }
2509 
2510         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2511         ASSERT_NE(ENOSYS, errno) {
2512                 TH_LOG("Kernel does not support seccomp syscall!");
2513         }
2514         ASSERT_EQ(0, ret) {
2515                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2516         }
2517         tsync_start_sibling(&self->sibling[0]);
2518         tsync_start_sibling(&self->sibling[1]);
2519 
2520         while (self->sibling_count < TSYNC_SIBLINGS) {
2521                 sem_wait(&self->started);
2522                 self->sibling_count++;
2523         }
2524 
2525         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2526                       &self->apply_prog);
2527         ASSERT_EQ(0, ret) {
2528                 TH_LOG("Could install filter on all threads!");
2529         }
2530         /* Tell the siblings to test the policy */
2531         pthread_mutex_lock(&self->mutex);
2532         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2533                 TH_LOG("cond broadcast non-zero");
2534         }
2535         pthread_mutex_unlock(&self->mutex);
2536         /* Ensure they are both killed and don't exit cleanly. */
2537         PTHREAD_JOIN(self->sibling[0].tid, &status);
2538         EXPECT_EQ(0x0, (long)status);
2539         PTHREAD_JOIN(self->sibling[1].tid, &status);
2540         EXPECT_EQ(0x0, (long)status);
2541 }
2542 
2543 TEST_F(TSYNC, two_sibling_want_nnp)
2544 {
2545         void *status;
2546 
2547         /* start siblings before any prctl() operations */
2548         tsync_start_sibling(&self->sibling[0]);
2549         tsync_start_sibling(&self->sibling[1]);
2550         while (self->sibling_count < TSYNC_SIBLINGS) {
2551                 sem_wait(&self->started);
2552                 self->sibling_count++;
2553         }
2554 
2555         /* Tell the siblings to test no policy */
2556         pthread_mutex_lock(&self->mutex);
2557         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2558                 TH_LOG("cond broadcast non-zero");
2559         }
2560         pthread_mutex_unlock(&self->mutex);
2561 
2562         /* Ensure they are both upset about lacking nnp. */
2563         PTHREAD_JOIN(self->sibling[0].tid, &status);
2564         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2565         PTHREAD_JOIN(self->sibling[1].tid, &status);
2566         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2567 }
2568 
2569 TEST_F(TSYNC, two_siblings_with_no_filter)
2570 {
2571         long ret;
2572         void *status;
2573 
2574         /* start siblings before any prctl() operations */
2575         tsync_start_sibling(&self->sibling[0]);
2576         tsync_start_sibling(&self->sibling[1]);
2577         while (self->sibling_count < TSYNC_SIBLINGS) {
2578                 sem_wait(&self->started);
2579                 self->sibling_count++;
2580         }
2581 
2582         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2583                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2584         }
2585 
2586         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2587                       &self->apply_prog);
2588         ASSERT_NE(ENOSYS, errno) {
2589                 TH_LOG("Kernel does not support seccomp syscall!");
2590         }
2591         ASSERT_EQ(0, ret) {
2592                 TH_LOG("Could install filter on all threads!");
2593         }
2594 
2595         /* Tell the siblings to test the policy */
2596         pthread_mutex_lock(&self->mutex);
2597         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2598                 TH_LOG("cond broadcast non-zero");
2599         }
2600         pthread_mutex_unlock(&self->mutex);
2601 
2602         /* Ensure they are both killed and don't exit cleanly. */
2603         PTHREAD_JOIN(self->sibling[0].tid, &status);
2604         EXPECT_EQ(0x0, (long)status);
2605         PTHREAD_JOIN(self->sibling[1].tid, &status);
2606         EXPECT_EQ(0x0, (long)status);
2607 }
2608 
2609 TEST_F(TSYNC, two_siblings_with_one_divergence)
2610 {
2611         long ret;
2612         void *status;
2613 
2614         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2615                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2616         }
2617 
2618         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2619         ASSERT_NE(ENOSYS, errno) {
2620                 TH_LOG("Kernel does not support seccomp syscall!");
2621         }
2622         ASSERT_EQ(0, ret) {
2623                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2624         }
2625         self->sibling[0].diverge = 1;
2626         tsync_start_sibling(&self->sibling[0]);
2627         tsync_start_sibling(&self->sibling[1]);
2628 
2629         while (self->sibling_count < TSYNC_SIBLINGS) {
2630                 sem_wait(&self->started);
2631                 self->sibling_count++;
2632         }
2633 
2634         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2635                       &self->apply_prog);
2636         ASSERT_EQ(self->sibling[0].system_tid, ret) {
2637                 TH_LOG("Did not fail on diverged sibling.");
2638         }
2639 
2640         /* Wake the threads */
2641         pthread_mutex_lock(&self->mutex);
2642         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2643                 TH_LOG("cond broadcast non-zero");
2644         }
2645         pthread_mutex_unlock(&self->mutex);
2646 
2647         /* Ensure they are both unkilled. */
2648         PTHREAD_JOIN(self->sibling[0].tid, &status);
2649         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2650         PTHREAD_JOIN(self->sibling[1].tid, &status);
2651         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2652 }
2653 
2654 TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
2655 {
2656         long ret, flags;
2657         void *status;
2658 
2659         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2660                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2661         }
2662 
2663         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2664         ASSERT_NE(ENOSYS, errno) {
2665                 TH_LOG("Kernel does not support seccomp syscall!");
2666         }
2667         ASSERT_EQ(0, ret) {
2668                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2669         }
2670         self->sibling[0].diverge = 1;
2671         tsync_start_sibling(&self->sibling[0]);
2672         tsync_start_sibling(&self->sibling[1]);
2673 
2674         while (self->sibling_count < TSYNC_SIBLINGS) {
2675                 sem_wait(&self->started);
2676                 self->sibling_count++;
2677         }
2678 
2679         flags = SECCOMP_FILTER_FLAG_TSYNC | \
2680                 SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
2681         ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog);
2682         ASSERT_EQ(ESRCH, errno) {
2683                 TH_LOG("Did not return ESRCH for diverged sibling.");
2684         }
2685         ASSERT_EQ(-1, ret) {
2686                 TH_LOG("Did not fail on diverged sibling.");
2687         }
2688 
2689         /* Wake the threads */
2690         pthread_mutex_lock(&self->mutex);
2691         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2692                 TH_LOG("cond broadcast non-zero");
2693         }
2694         pthread_mutex_unlock(&self->mutex);
2695 
2696         /* Ensure they are both unkilled. */
2697         PTHREAD_JOIN(self->sibling[0].tid, &status);
2698         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2699         PTHREAD_JOIN(self->sibling[1].tid, &status);
2700         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2701 }
2702 
2703 TEST_F(TSYNC, two_siblings_not_under_filter)
2704 {
2705         long ret, sib;
2706         void *status;
2707         struct timespec delay = { .tv_nsec = 100000000 };
2708 
2709         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2710                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2711         }
2712 
2713         /*
2714          * Sibling 0 will have its own seccomp policy
2715          * and Sibling 1 will not be under seccomp at
2716          * all. Sibling 1 will enter seccomp and 0
2717          * will cause failure.
2718          */
2719         self->sibling[0].diverge = 1;
2720         tsync_start_sibling(&self->sibling[0]);
2721         tsync_start_sibling(&self->sibling[1]);
2722 
2723         while (self->sibling_count < TSYNC_SIBLINGS) {
2724                 sem_wait(&self->started);
2725                 self->sibling_count++;
2726         }
2727 
2728         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2729         ASSERT_NE(ENOSYS, errno) {
2730                 TH_LOG("Kernel does not support seccomp syscall!");
2731         }
2732         ASSERT_EQ(0, ret) {
2733                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2734         }
2735 
2736         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2737                       &self->apply_prog);
2738         ASSERT_EQ(ret, self->sibling[0].system_tid) {
2739                 TH_LOG("Did not fail on diverged sibling.");
2740         }
2741         sib = 1;
2742         if (ret == self->sibling[0].system_tid)
2743                 sib = 0;
2744 
2745         pthread_mutex_lock(&self->mutex);
2746 
2747         /* Increment the other siblings num_waits so we can clean up
2748          * the one we just saw.
2749          */
2750         self->sibling[!sib].num_waits += 1;
2751 
2752         /* Signal the thread to clean up*/
2753         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2754                 TH_LOG("cond broadcast non-zero");
2755         }
2756         pthread_mutex_unlock(&self->mutex);
2757         PTHREAD_JOIN(self->sibling[sib].tid, &status);
2758         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2759         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2760         while (!kill(self->sibling[sib].system_tid, 0))
2761                 nanosleep(&delay, NULL);
2762         /* Switch to the remaining sibling */
2763         sib = !sib;
2764 
2765         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2766                       &self->apply_prog);
2767         ASSERT_EQ(0, ret) {
2768                 TH_LOG("Expected the remaining sibling to sync");
2769         };
2770 
2771         pthread_mutex_lock(&self->mutex);
2772 
2773         /* If remaining sibling didn't have a chance to wake up during
2774          * the first broadcast, manually reduce the num_waits now.
2775          */
2776         if (self->sibling[sib].num_waits > 1)
2777                 self->sibling[sib].num_waits = 1;
2778         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2779                 TH_LOG("cond broadcast non-zero");
2780         }
2781         pthread_mutex_unlock(&self->mutex);
2782         PTHREAD_JOIN(self->sibling[sib].tid, &status);
2783         EXPECT_EQ(0, (long)status);
2784         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2785         while (!kill(self->sibling[sib].system_tid, 0))
2786                 nanosleep(&delay, NULL);
2787 
2788         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2789                       &self->apply_prog);
2790         ASSERT_EQ(0, ret);  /* just us chickens */
2791 }
2792 
2793 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
2794 TEST(syscall_restart)
2795 {
2796         long ret;
2797         unsigned long msg;
2798         pid_t child_pid;
2799         int pipefd[2];
2800         int status;
2801         siginfo_t info = { };
2802         struct sock_filter filter[] = {
2803                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2804                          offsetof(struct seccomp_data, nr)),
2805 
2806 #ifdef __NR_sigreturn
2807                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0),
2808 #endif
2809                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0),
2810                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0),
2811                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0),
2812                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0),
2813                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0),
2814                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2815 
2816                 /* Allow __NR_write for easy logging. */
2817                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2818                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2819                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2820                 /* The nanosleep jump target. */
2821                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2822                 /* The restart_syscall jump target. */
2823                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2824         };
2825         struct sock_fprog prog = {
2826                 .len = (unsigned short)ARRAY_SIZE(filter),
2827                 .filter = filter,
2828         };
2829 #if defined(__arm__)
2830         struct utsname utsbuf;
2831 #endif
2832 
2833         ASSERT_EQ(0, pipe(pipefd));
2834 
2835         child_pid = fork();
2836         ASSERT_LE(0, child_pid);
2837         if (child_pid == 0) {
2838                 /* Child uses EXPECT not ASSERT to deliver status correctly. */
2839                 char buf = ' ';
2840                 struct timespec timeout = { };
2841 
2842                 /* Attach parent as tracer and stop. */
2843                 EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2844                 EXPECT_EQ(0, raise(SIGSTOP));
2845 
2846                 EXPECT_EQ(0, close(pipefd[1]));
2847 
2848                 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2849                         TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2850                 }
2851 
2852                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2853                 EXPECT_EQ(0, ret) {
2854                         TH_LOG("Failed to install filter!");
2855                 }
2856 
2857                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2858                         TH_LOG("Failed to read() sync from parent");
2859                 }
2860                 EXPECT_EQ('.', buf) {
2861                         TH_LOG("Failed to get sync data from read()");
2862                 }
2863 
2864                 /* Start nanosleep to be interrupted. */
2865                 timeout.tv_sec = 1;
2866                 errno = 0;
2867                 EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2868                         TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2869                 }
2870 
2871                 /* Read final sync from parent. */
2872                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2873                         TH_LOG("Failed final read() from parent");
2874                 }
2875                 EXPECT_EQ('!', buf) {
2876                         TH_LOG("Failed to get final data from read()");
2877                 }
2878 
2879                 /* Directly report the status of our test harness results. */
2880                 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2881                                                      : EXIT_FAILURE);
2882         }
2883         EXPECT_EQ(0, close(pipefd[0]));
2884 
2885         /* Attach to child, setup options, and release. */
2886         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2887         ASSERT_EQ(true, WIFSTOPPED(status));
2888         ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2889                             PTRACE_O_TRACESECCOMP));
2890         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2891         ASSERT_EQ(1, write(pipefd[1], ".", 1));
2892 
2893         /* Wait for nanosleep() to start. */
2894         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2895         ASSERT_EQ(true, WIFSTOPPED(status));
2896         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2897         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2898         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2899         ASSERT_EQ(0x100, msg);
2900         ret = get_syscall(_metadata, child_pid);
2901         EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep);
2902 
2903         /* Might as well check siginfo for sanity while we're here. */
2904         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2905         ASSERT_EQ(SIGTRAP, info.si_signo);
2906         ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2907         EXPECT_EQ(0, info.si_errno);
2908         EXPECT_EQ(getuid(), info.si_uid);
2909         /* Verify signal delivery came from child (seccomp-triggered). */
2910         EXPECT_EQ(child_pid, info.si_pid);
2911 
2912         /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2913         ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2914         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2915         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2916         ASSERT_EQ(true, WIFSTOPPED(status));
2917         ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2918         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2919         /*
2920          * There is no siginfo on SIGSTOP any more, so we can't verify
2921          * signal delivery came from parent now (getpid() == info.si_pid).
2922          * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
2923          * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
2924          */
2925         EXPECT_EQ(SIGSTOP, info.si_signo);
2926 
2927         /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2928         ASSERT_EQ(0, kill(child_pid, SIGCONT));
2929         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2930         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2931         ASSERT_EQ(true, WIFSTOPPED(status));
2932         ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2933         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2934 
2935         /* Wait for restart_syscall() to start. */
2936         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2937         ASSERT_EQ(true, WIFSTOPPED(status));
2938         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2939         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2940         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2941 
2942         ASSERT_EQ(0x200, msg);
2943         ret = get_syscall(_metadata, child_pid);
2944 #if defined(__arm__)
2945         /*
2946          * FIXME:
2947          * - native ARM registers do NOT expose true syscall.
2948          * - compat ARM registers on ARM64 DO expose true syscall.
2949          */
2950         ASSERT_EQ(0, uname(&utsbuf));
2951         if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2952                 EXPECT_EQ(__NR_nanosleep, ret);
2953         } else
2954 #endif
2955         {
2956                 EXPECT_EQ(__NR_restart_syscall, ret);
2957         }
2958 
2959         /* Write again to end test. */
2960         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2961         ASSERT_EQ(1, write(pipefd[1], "!", 1));
2962         EXPECT_EQ(0, close(pipefd[1]));
2963 
2964         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2965         if (WIFSIGNALED(status) || WEXITSTATUS(status))
2966                 _metadata->passed = 0;
2967 }
2968 
2969 TEST_SIGNAL(filter_flag_log, SIGSYS)
2970 {
2971         struct sock_filter allow_filter[] = {
2972                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2973         };
2974         struct sock_filter kill_filter[] = {
2975                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2976                         offsetof(struct seccomp_data, nr)),
2977                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2978                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2979                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2980         };
2981         struct sock_fprog allow_prog = {
2982                 .len = (unsigned short)ARRAY_SIZE(allow_filter),
2983                 .filter = allow_filter,
2984         };
2985         struct sock_fprog kill_prog = {
2986                 .len = (unsigned short)ARRAY_SIZE(kill_filter),
2987                 .filter = kill_filter,
2988         };
2989         long ret;
2990         pid_t parent = getppid();
2991 
2992         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2993         ASSERT_EQ(0, ret);
2994 
2995         /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
2996         ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
2997                       &allow_prog);
2998         ASSERT_NE(ENOSYS, errno) {
2999                 TH_LOG("Kernel does not support seccomp syscall!");
3000         }
3001         EXPECT_NE(0, ret) {
3002                 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
3003         }
3004         EXPECT_EQ(EINVAL, errno) {
3005                 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
3006         }
3007 
3008         /* Verify that a simple, permissive filter can be added with no flags */
3009         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
3010         EXPECT_EQ(0, ret);
3011 
3012         /* See if the same filter can be added with the FILTER_FLAG_LOG flag */
3013         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
3014                       &allow_prog);
3015         ASSERT_NE(EINVAL, errno) {
3016                 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
3017         }
3018         EXPECT_EQ(0, ret);
3019 
3020         /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
3021         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
3022                       &kill_prog);
3023         EXPECT_EQ(0, ret);
3024 
3025         EXPECT_EQ(parent, syscall(__NR_getppid));
3026         /* getpid() should never return. */
3027         EXPECT_EQ(0, syscall(__NR_getpid));
3028 }
3029 
3030 TEST(get_action_avail)
3031 {
3032         __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
3033                             SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
3034                             SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
3035         __u32 unknown_action = 0x10000000U;
3036         int i;
3037         long ret;
3038 
3039         ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
3040         ASSERT_NE(ENOSYS, errno) {
3041                 TH_LOG("Kernel does not support seccomp syscall!");
3042         }
3043         ASSERT_NE(EINVAL, errno) {
3044                 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
3045         }
3046         EXPECT_EQ(ret, 0);
3047 
3048         for (i = 0; i < ARRAY_SIZE(actions); i++) {
3049                 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
3050                 EXPECT_EQ(ret, 0) {
3051                         TH_LOG("Expected action (0x%X) not available!",
3052                                actions[i]);
3053                 }
3054         }
3055 
3056         /* Check that an unknown action is handled properly (EOPNOTSUPP) */
3057         ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
3058         EXPECT_EQ(ret, -1);
3059         EXPECT_EQ(errno, EOPNOTSUPP);
3060 }
3061 
3062 TEST(get_metadata)
3063 {
3064         pid_t pid;
3065         int pipefd[2];
3066         char buf;
3067         struct seccomp_metadata md;
3068         long ret;
3069 
3070         /* Only real root can get metadata. */
3071         if (geteuid()) {
3072                 XFAIL(return, "get_metadata requires real root");
3073                 return;
3074         }
3075 
3076         ASSERT_EQ(0, pipe(pipefd));
3077 
3078         pid = fork();
3079         ASSERT_GE(pid, 0);
3080         if (pid == 0) {
3081                 struct sock_filter filter[] = {
3082                         BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3083                 };
3084                 struct sock_fprog prog = {
3085                         .len = (unsigned short)ARRAY_SIZE(filter),
3086                         .filter = filter,
3087                 };
3088 
3089                 /* one with log, one without */
3090                 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
3091                                      SECCOMP_FILTER_FLAG_LOG, &prog));
3092                 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
3093 
3094                 EXPECT_EQ(0, close(pipefd[0]));
3095                 ASSERT_EQ(1, write(pipefd[1], "1", 1));
3096                 ASSERT_EQ(0, close(pipefd[1]));
3097 
3098                 while (1)
3099                         sleep(100);
3100         }
3101 
3102         ASSERT_EQ(0, close(pipefd[1]));
3103         ASSERT_EQ(1, read(pipefd[0], &buf, 1));
3104 
3105         ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
3106         ASSERT_EQ(pid, waitpid(pid, NULL, 0));
3107 
3108         /* Past here must not use ASSERT or child process is never killed. */
3109 
3110         md.filter_off = 0;
3111         errno = 0;
3112         ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3113         EXPECT_EQ(sizeof(md), ret) {
3114                 if (errno == EINVAL)
3115                         XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
3116         }
3117 
3118         EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
3119         EXPECT_EQ(md.filter_off, 0);
3120 
3121         md.filter_off = 1;
3122         ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3123         EXPECT_EQ(sizeof(md), ret);
3124         EXPECT_EQ(md.flags, 0);
3125         EXPECT_EQ(md.filter_off, 1);
3126 
3127 skip:
3128         ASSERT_EQ(0, kill(pid, SIGKILL));
3129 }
3130 
3131 static int user_trap_syscall(int nr, unsigned int flags)
3132 {
3133         struct sock_filter filter[] = {
3134                 BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
3135                         offsetof(struct seccomp_data, nr)),
3136                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
3137                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
3138                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
3139         };
3140 
3141         struct sock_fprog prog = {
3142                 .len = (unsigned short)ARRAY_SIZE(filter),
3143                 .filter = filter,
3144         };
3145 
3146         return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
3147 }
3148 
3149 #define USER_NOTIF_MAGIC INT_MAX
3150 TEST(user_notification_basic)
3151 {
3152         pid_t pid;
3153         long ret;
3154         int status, listener;
3155         struct seccomp_notif req = {};
3156         struct seccomp_notif_resp resp = {};
3157         struct pollfd pollfd;
3158 
3159         struct sock_filter filter[] = {
3160                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3161         };
3162         struct sock_fprog prog = {
3163                 .len = (unsigned short)ARRAY_SIZE(filter),
3164                 .filter = filter,
3165         };
3166 
3167         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3168         ASSERT_EQ(0, ret) {
3169                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3170         }
3171 
3172         pid = fork();
3173         ASSERT_GE(pid, 0);
3174 
3175         /* Check that we get -ENOSYS with no listener attached */
3176         if (pid == 0) {
3177                 if (user_trap_syscall(__NR_getppid, 0) < 0)
3178                         exit(1);
3179                 ret = syscall(__NR_getppid);
3180                 exit(ret >= 0 || errno != ENOSYS);
3181         }
3182 
3183         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3184         EXPECT_EQ(true, WIFEXITED(status));
3185         EXPECT_EQ(0, WEXITSTATUS(status));
3186 
3187         /* Add some no-op filters for grins. */
3188         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3189         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3190         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3191         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3192 
3193         /* Check that the basic notification machinery works */
3194         listener = user_trap_syscall(__NR_getppid,
3195                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3196         ASSERT_GE(listener, 0);
3197 
3198         /* Installing a second listener in the chain should EBUSY */
3199         EXPECT_EQ(user_trap_syscall(__NR_getppid,
3200                                     SECCOMP_FILTER_FLAG_NEW_LISTENER),
3201                   -1);
3202         EXPECT_EQ(errno, EBUSY);
3203 
3204         pid = fork();
3205         ASSERT_GE(pid, 0);
3206 
3207         if (pid == 0) {
3208                 ret = syscall(__NR_getppid);
3209                 exit(ret != USER_NOTIF_MAGIC);
3210         }
3211 
3212         pollfd.fd = listener;
3213         pollfd.events = POLLIN | POLLOUT;
3214 
3215         EXPECT_GT(poll(&pollfd, 1, -1), 0);
3216         EXPECT_EQ(pollfd.revents, POLLIN);
3217 
3218         /* Test that we can't pass garbage to the kernel. */
3219         memset(&req, 0, sizeof(req));
3220         req.pid = -1;
3221         errno = 0;
3222         ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
3223         EXPECT_EQ(-1, ret);
3224         EXPECT_EQ(EINVAL, errno);
3225 
3226         if (ret) {
3227                 req.pid = 0;
3228                 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3229         }
3230 
3231         pollfd.fd = listener;
3232         pollfd.events = POLLIN | POLLOUT;
3233 
3234         EXPECT_GT(poll(&pollfd, 1, -1), 0);
3235         EXPECT_EQ(pollfd.revents, POLLOUT);
3236 
3237         EXPECT_EQ(req.data.nr,  __NR_getppid);
3238 
3239         resp.id = req.id;
3240         resp.error = 0;
3241         resp.val = USER_NOTIF_MAGIC;
3242 
3243         /* check that we make sure flags == 0 */
3244         resp.flags = 1;
3245         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3246         EXPECT_EQ(errno, EINVAL);
3247 
3248         resp.flags = 0;
3249         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3250 
3251         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3252         EXPECT_EQ(true, WIFEXITED(status));
3253         EXPECT_EQ(0, WEXITSTATUS(status));
3254 }
3255 
3256 TEST(user_notification_with_tsync)
3257 {
3258         int ret;
3259         unsigned int flags;
3260 
3261         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3262         ASSERT_EQ(0, ret) {
3263                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3264         }
3265 
3266         /* these were exclusive */
3267         flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
3268                 SECCOMP_FILTER_FLAG_TSYNC;
3269         ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags));
3270         ASSERT_EQ(EINVAL, errno);
3271 
3272         /* but now they're not */
3273         flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
3274         ret = user_trap_syscall(__NR_getppid, flags);
3275         close(ret);
3276         ASSERT_LE(0, ret);
3277 }
3278 
3279 TEST(user_notification_kill_in_middle)
3280 {
3281         pid_t pid;
3282         long ret;
3283         int listener;
3284         struct seccomp_notif req = {};
3285         struct seccomp_notif_resp resp = {};
3286 
3287         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3288         ASSERT_EQ(0, ret) {
3289                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3290         }
3291 
3292         listener = user_trap_syscall(__NR_getppid,
3293                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3294         ASSERT_GE(listener, 0);
3295 
3296         /*
3297          * Check that nothing bad happens when we kill the task in the middle
3298          * of a syscall.
3299          */
3300         pid = fork();
3301         ASSERT_GE(pid, 0);
3302 
3303         if (pid == 0) {
3304                 ret = syscall(__NR_getppid);
3305                 exit(ret != USER_NOTIF_MAGIC);
3306         }
3307 
3308         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3309         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0);
3310 
3311         EXPECT_EQ(kill(pid, SIGKILL), 0);
3312         EXPECT_EQ(waitpid(pid, NULL, 0), pid);
3313 
3314         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1);
3315 
3316         resp.id = req.id;
3317         ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp);
3318         EXPECT_EQ(ret, -1);
3319         EXPECT_EQ(errno, ENOENT);
3320 }
3321 
3322 static int handled = -1;
3323 
3324 static void signal_handler(int signal)
3325 {
3326         if (write(handled, "c", 1) != 1)
3327                 perror("write from signal");
3328 }
3329 
3330 TEST(user_notification_signal)
3331 {
3332         pid_t pid;
3333         long ret;
3334         int status, listener, sk_pair[2];
3335         struct seccomp_notif req = {};
3336         struct seccomp_notif_resp resp = {};
3337         char c;
3338 
3339         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3340         ASSERT_EQ(0, ret) {
3341                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3342         }
3343 
3344         ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
3345 
3346         listener = user_trap_syscall(__NR_gettid,
3347                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3348         ASSERT_GE(listener, 0);
3349 
3350         pid = fork();
3351         ASSERT_GE(pid, 0);
3352 
3353         if (pid == 0) {
3354                 close(sk_pair[0]);
3355                 handled = sk_pair[1];
3356                 if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
3357                         perror("signal");
3358                         exit(1);
3359                 }
3360                 /*
3361                  * ERESTARTSYS behavior is a bit hard to test, because we need
3362                  * to rely on a signal that has not yet been handled. Let's at
3363                  * least check that the error code gets propagated through, and
3364                  * hope that it doesn't break when there is actually a signal :)
3365                  */
3366                 ret = syscall(__NR_gettid);
3367                 exit(!(ret == -1 && errno == 512));
3368         }
3369 
3370         close(sk_pair[1]);
3371 
3372         memset(&req, 0, sizeof(req));
3373         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3374 
3375         EXPECT_EQ(kill(pid, SIGUSR1), 0);
3376 
3377         /*
3378          * Make sure the signal really is delivered, which means we're not
3379          * stuck in the user notification code any more and the notification
3380          * should be dead.
3381          */
3382         EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
3383 
3384         resp.id = req.id;
3385         resp.error = -EPERM;
3386         resp.val = 0;
3387 
3388         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3389         EXPECT_EQ(errno, ENOENT);
3390 
3391         memset(&req, 0, sizeof(req));
3392         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3393 
3394         resp.id = req.id;
3395         resp.error = -512; /* -ERESTARTSYS */
3396         resp.val = 0;
3397 
3398         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3399 
3400         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3401         EXPECT_EQ(true, WIFEXITED(status));
3402         EXPECT_EQ(0, WEXITSTATUS(status));
3403 }
3404 
3405 TEST(user_notification_closed_listener)
3406 {
3407         pid_t pid;
3408         long ret;
3409         int status, listener;
3410 
3411         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3412         ASSERT_EQ(0, ret) {
3413                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3414         }
3415 
3416         listener = user_trap_syscall(__NR_getppid,
3417                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3418         ASSERT_GE(listener, 0);
3419 
3420         /*
3421          * Check that we get an ENOSYS when the listener is closed.
3422          */
3423         pid = fork();
3424         ASSERT_GE(pid, 0);
3425         if (pid == 0) {
3426                 close(listener);
3427                 ret = syscall(__NR_getppid);
3428                 exit(ret != -1 && errno != ENOSYS);
3429         }
3430 
3431         close(listener);
3432 
3433         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3434         EXPECT_EQ(true, WIFEXITED(status));
3435         EXPECT_EQ(0, WEXITSTATUS(status));
3436 }
3437 
3438 /*
3439  * Check that a pid in a child namespace still shows up as valid in ours.
3440  */
3441 TEST(user_notification_child_pid_ns)
3442 {
3443         pid_t pid;
3444         int status, listener;
3445         struct seccomp_notif req = {};
3446         struct seccomp_notif_resp resp = {};
3447 
3448         ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0);
3449 
3450         listener = user_trap_syscall(__NR_getppid,
3451                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3452         ASSERT_GE(listener, 0);
3453 
3454         pid = fork();
3455         ASSERT_GE(pid, 0);
3456 
3457         if (pid == 0)
3458                 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3459 
3460         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3461         EXPECT_EQ(req.pid, pid);
3462 
3463         resp.id = req.id;
3464         resp.error = 0;
3465         resp.val = USER_NOTIF_MAGIC;
3466 
3467         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3468 
3469         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3470         EXPECT_EQ(true, WIFEXITED(status));
3471         EXPECT_EQ(0, WEXITSTATUS(status));
3472         close(listener);
3473 }
3474 
3475 /*
3476  * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e.
3477  * invalid.
3478  */
3479 TEST(user_notification_sibling_pid_ns)
3480 {
3481         pid_t pid, pid2;
3482         int status, listener;
3483         struct seccomp_notif req = {};
3484         struct seccomp_notif_resp resp = {};
3485 
3486         ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
3487                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3488         }
3489 
3490         listener = user_trap_syscall(__NR_getppid,
3491                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3492         ASSERT_GE(listener, 0);
3493 
3494         pid = fork();
3495         ASSERT_GE(pid, 0);
3496 
3497         if (pid == 0) {
3498                 ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3499 
3500                 pid2 = fork();
3501                 ASSERT_GE(pid2, 0);
3502 
3503                 if (pid2 == 0)
3504                         exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3505 
3506                 EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3507                 EXPECT_EQ(true, WIFEXITED(status));
3508                 EXPECT_EQ(0, WEXITSTATUS(status));
3509                 exit(WEXITSTATUS(status));
3510         }
3511 
3512         /* Create the sibling ns, and sibling in it. */
3513         ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3514         ASSERT_EQ(errno, 0);
3515 
3516         pid2 = fork();
3517         ASSERT_GE(pid2, 0);
3518 
3519         if (pid2 == 0) {
3520                 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3521                 /*
3522                  * The pid should be 0, i.e. the task is in some namespace that
3523                  * we can't "see".
3524                  */
3525                 EXPECT_EQ(req.pid, 0);
3526 
3527                 resp.id = req.id;
3528                 resp.error = 0;
3529                 resp.val = USER_NOTIF_MAGIC;
3530 
3531                 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3532                 exit(0);
3533         }
3534 
3535         close(listener);
3536 
3537         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3538         EXPECT_EQ(true, WIFEXITED(status));
3539         EXPECT_EQ(0, WEXITSTATUS(status));
3540 
3541         EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3542         EXPECT_EQ(true, WIFEXITED(status));
3543         EXPECT_EQ(0, WEXITSTATUS(status));
3544 }
3545 
3546 TEST(user_notification_fault_recv)
3547 {
3548         pid_t pid;
3549         int status, listener;
3550         struct seccomp_notif req = {};
3551         struct seccomp_notif_resp resp = {};
3552 
3553         ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
3554 
3555         listener = user_trap_syscall(__NR_getppid,
3556                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3557         ASSERT_GE(listener, 0);
3558 
3559         pid = fork();
3560         ASSERT_GE(pid, 0);
3561 
3562         if (pid == 0)
3563                 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3564 
3565         /* Do a bad recv() */
3566         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
3567         EXPECT_EQ(errno, EFAULT);
3568 
3569         /* We should still be able to receive this notification, though. */
3570         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3571         EXPECT_EQ(req.pid, pid);
3572 
3573         resp.id = req.id;
3574         resp.error = 0;
3575         resp.val = USER_NOTIF_MAGIC;
3576 
3577         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3578 
3579         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3580         EXPECT_EQ(true, WIFEXITED(status));
3581         EXPECT_EQ(0, WEXITSTATUS(status));
3582 }
3583 
3584 TEST(seccomp_get_notif_sizes)
3585 {
3586         struct seccomp_notif_sizes sizes;
3587 
3588         ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0);
3589         EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif));
3590         EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
3591 }
3592 
3593 static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
3594 {
3595 #ifdef __NR_kcmp
3596         return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
3597 #else
3598         errno = ENOSYS;
3599         return -1;
3600 #endif
3601 }
3602 
3603 TEST(user_notification_continue)
3604 {
3605         pid_t pid;
3606         long ret;
3607         int status, listener;
3608         struct seccomp_notif req = {};
3609         struct seccomp_notif_resp resp = {};
3610         struct pollfd pollfd;
3611 
3612         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3613         ASSERT_EQ(0, ret) {
3614                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3615         }
3616 
3617         listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3618         ASSERT_GE(listener, 0);
3619 
3620         pid = fork();
3621         ASSERT_GE(pid, 0);
3622 
3623         if (pid == 0) {
3624                 int dup_fd, pipe_fds[2];
3625                 pid_t self;
3626 
3627                 ret = pipe(pipe_fds);
3628                 if (ret < 0)
3629                         exit(1);
3630 
3631                 dup_fd = dup(pipe_fds[0]);
3632                 if (dup_fd < 0)
3633                         exit(1);
3634 
3635                 self = getpid();
3636 
3637                 ret = filecmp(self, self, pipe_fds[0], dup_fd);
3638                 if (ret)
3639                         exit(2);
3640 
3641                 exit(0);
3642         }
3643 
3644         pollfd.fd = listener;
3645         pollfd.events = POLLIN | POLLOUT;
3646 
3647         EXPECT_GT(poll(&pollfd, 1, -1), 0);
3648         EXPECT_EQ(pollfd.revents, POLLIN);
3649 
3650         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3651 
3652         pollfd.fd = listener;
3653         pollfd.events = POLLIN | POLLOUT;
3654 
3655         EXPECT_GT(poll(&pollfd, 1, -1), 0);
3656         EXPECT_EQ(pollfd.revents, POLLOUT);
3657 
3658         EXPECT_EQ(req.data.nr, __NR_dup);
3659 
3660         resp.id = req.id;
3661         resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
3662 
3663         /*
3664          * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other
3665          * args be set to 0.
3666          */
3667         resp.error = 0;
3668         resp.val = USER_NOTIF_MAGIC;
3669         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3670         EXPECT_EQ(errno, EINVAL);
3671 
3672         resp.error = USER_NOTIF_MAGIC;
3673         resp.val = 0;
3674         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3675         EXPECT_EQ(errno, EINVAL);
3676 
3677         resp.error = 0;
3678         resp.val = 0;
3679         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
3680                 if (errno == EINVAL)
3681                         XFAIL(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
3682         }
3683 
3684 skip:
3685         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3686         EXPECT_EQ(true, WIFEXITED(status));
3687         EXPECT_EQ(0, WEXITSTATUS(status)) {
3688                 if (WEXITSTATUS(status) == 2) {
3689                         XFAIL(return, "Kernel does not support kcmp() syscall");
3690                         return;
3691                 }
3692         }
3693 }
3694 
3695 /*
3696  * TODO:
3697  * - add microbenchmarks
3698  * - expand NNP testing
3699  * - better arch-specific TRACE and TRAP handlers.
3700  * - endianness checking when appropriate
3701  * - 64-bit arg prodding
3702  * - arch value testing (x86 modes especially)
3703  * - verify that FILTER_FLAG_LOG filters generate log messages
3704  * - verify that RET_LOG generates log messages
3705  * - ...
3706  */
3707 
3708 TEST_HARNESS_MAIN
3709 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp