~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/testing/selftests/seccomp/seccomp_bpf.c

Version: ~ [ linux-6.2-rc3 ] ~ [ linux-6.1.5 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.87 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.162 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.228 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.269 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.302 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.302 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
  4  *
  5  * Test code for seccomp bpf.
  6  */
  7 
  8 #define _GNU_SOURCE
  9 #include <sys/types.h>
 10 
 11 /*
 12  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
 13  * we need to use the kernel's siginfo.h file and trick glibc
 14  * into accepting it.
 15  */
 16 #if !__GLIBC_PREREQ(2, 26)
 17 # include <asm/siginfo.h>
 18 # define __have_siginfo_t 1
 19 # define __have_sigval_t 1
 20 # define __have_sigevent_t 1
 21 #endif
 22 
 23 #include <errno.h>
 24 #include <linux/filter.h>
 25 #include <sys/prctl.h>
 26 #include <sys/ptrace.h>
 27 #include <sys/user.h>
 28 #include <linux/prctl.h>
 29 #include <linux/ptrace.h>
 30 #include <linux/seccomp.h>
 31 #include <pthread.h>
 32 #include <semaphore.h>
 33 #include <signal.h>
 34 #include <stddef.h>
 35 #include <stdbool.h>
 36 #include <string.h>
 37 #include <time.h>
 38 #include <limits.h>
 39 #include <linux/elf.h>
 40 #include <sys/uio.h>
 41 #include <sys/utsname.h>
 42 #include <sys/fcntl.h>
 43 #include <sys/mman.h>
 44 #include <sys/times.h>
 45 #include <sys/socket.h>
 46 #include <sys/ioctl.h>
 47 #include <linux/kcmp.h>
 48 #include <sys/resource.h>
 49 
 50 #include <unistd.h>
 51 #include <sys/syscall.h>
 52 #include <poll.h>
 53 
 54 #include "../kselftest_harness.h"
 55 #include "../clone3/clone3_selftests.h"
 56 
 57 /* Attempt to de-conflict with the selftests tree. */
 58 #ifndef SKIP
 59 #define SKIP(s, ...)    XFAIL(s, ##__VA_ARGS__)
 60 #endif
 61 
 62 #ifndef PR_SET_PTRACER
 63 # define PR_SET_PTRACER 0x59616d61
 64 #endif
 65 
 66 #ifndef PR_SET_NO_NEW_PRIVS
 67 #define PR_SET_NO_NEW_PRIVS 38
 68 #define PR_GET_NO_NEW_PRIVS 39
 69 #endif
 70 
 71 #ifndef PR_SECCOMP_EXT
 72 #define PR_SECCOMP_EXT 43
 73 #endif
 74 
 75 #ifndef SECCOMP_EXT_ACT
 76 #define SECCOMP_EXT_ACT 1
 77 #endif
 78 
 79 #ifndef SECCOMP_EXT_ACT_TSYNC
 80 #define SECCOMP_EXT_ACT_TSYNC 1
 81 #endif
 82 
 83 #ifndef SECCOMP_MODE_STRICT
 84 #define SECCOMP_MODE_STRICT 1
 85 #endif
 86 
 87 #ifndef SECCOMP_MODE_FILTER
 88 #define SECCOMP_MODE_FILTER 2
 89 #endif
 90 
 91 #ifndef SECCOMP_RET_ALLOW
 92 struct seccomp_data {
 93         int nr;
 94         __u32 arch;
 95         __u64 instruction_pointer;
 96         __u64 args[6];
 97 };
 98 #endif
 99 
100 #ifndef SECCOMP_RET_KILL_PROCESS
101 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
102 #define SECCOMP_RET_KILL_THREAD  0x00000000U /* kill the thread */
103 #endif
104 #ifndef SECCOMP_RET_KILL
105 #define SECCOMP_RET_KILL         SECCOMP_RET_KILL_THREAD
106 #define SECCOMP_RET_TRAP         0x00030000U /* disallow and force a SIGSYS */
107 #define SECCOMP_RET_ERRNO        0x00050000U /* returns an errno */
108 #define SECCOMP_RET_TRACE        0x7ff00000U /* pass to a tracer or disallow */
109 #define SECCOMP_RET_ALLOW        0x7fff0000U /* allow */
110 #endif
111 #ifndef SECCOMP_RET_LOG
112 #define SECCOMP_RET_LOG          0x7ffc0000U /* allow after logging */
113 #endif
114 
115 #ifndef __NR_seccomp
116 # if defined(__i386__)
117 #  define __NR_seccomp 354
118 # elif defined(__x86_64__)
119 #  define __NR_seccomp 317
120 # elif defined(__arm__)
121 #  define __NR_seccomp 383
122 # elif defined(__aarch64__)
123 #  define __NR_seccomp 277
124 # elif defined(__riscv)
125 #  define __NR_seccomp 277
126 # elif defined(__csky__)
127 #  define __NR_seccomp 277
128 # elif defined(__hppa__)
129 #  define __NR_seccomp 338
130 # elif defined(__powerpc__)
131 #  define __NR_seccomp 358
132 # elif defined(__s390__)
133 #  define __NR_seccomp 348
134 # elif defined(__xtensa__)
135 #  define __NR_seccomp 337
136 # elif defined(__sh__)
137 #  define __NR_seccomp 372
138 # else
139 #  warning "seccomp syscall number unknown for this architecture"
140 #  define __NR_seccomp 0xffff
141 # endif
142 #endif
143 
144 #ifndef SECCOMP_SET_MODE_STRICT
145 #define SECCOMP_SET_MODE_STRICT 0
146 #endif
147 
148 #ifndef SECCOMP_SET_MODE_FILTER
149 #define SECCOMP_SET_MODE_FILTER 1
150 #endif
151 
152 #ifndef SECCOMP_GET_ACTION_AVAIL
153 #define SECCOMP_GET_ACTION_AVAIL 2
154 #endif
155 
156 #ifndef SECCOMP_GET_NOTIF_SIZES
157 #define SECCOMP_GET_NOTIF_SIZES 3
158 #endif
159 
160 #ifndef SECCOMP_FILTER_FLAG_TSYNC
161 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
162 #endif
163 
164 #ifndef SECCOMP_FILTER_FLAG_LOG
165 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
166 #endif
167 
168 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
169 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
170 #endif
171 
172 #ifndef PTRACE_SECCOMP_GET_METADATA
173 #define PTRACE_SECCOMP_GET_METADATA     0x420d
174 
175 struct seccomp_metadata {
176         __u64 filter_off;       /* Input: which filter */
177         __u64 flags;             /* Output: filter's flags */
178 };
179 #endif
180 
181 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
182 #define SECCOMP_FILTER_FLAG_NEW_LISTENER        (1UL << 3)
183 #endif
184 
185 #ifndef SECCOMP_RET_USER_NOTIF
186 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U
187 
188 #define SECCOMP_IOC_MAGIC               '!'
189 #define SECCOMP_IO(nr)                  _IO(SECCOMP_IOC_MAGIC, nr)
190 #define SECCOMP_IOR(nr, type)           _IOR(SECCOMP_IOC_MAGIC, nr, type)
191 #define SECCOMP_IOW(nr, type)           _IOW(SECCOMP_IOC_MAGIC, nr, type)
192 #define SECCOMP_IOWR(nr, type)          _IOWR(SECCOMP_IOC_MAGIC, nr, type)
193 
194 /* Flags for seccomp notification fd ioctl. */
195 #define SECCOMP_IOCTL_NOTIF_RECV        SECCOMP_IOWR(0, struct seccomp_notif)
196 #define SECCOMP_IOCTL_NOTIF_SEND        SECCOMP_IOWR(1, \
197                                                 struct seccomp_notif_resp)
198 #define SECCOMP_IOCTL_NOTIF_ID_VALID    SECCOMP_IOW(2, __u64)
199 
200 struct seccomp_notif {
201         __u64 id;
202         __u32 pid;
203         __u32 flags;
204         struct seccomp_data data;
205 };
206 
207 struct seccomp_notif_resp {
208         __u64 id;
209         __s64 val;
210         __s32 error;
211         __u32 flags;
212 };
213 
214 struct seccomp_notif_sizes {
215         __u16 seccomp_notif;
216         __u16 seccomp_notif_resp;
217         __u16 seccomp_data;
218 };
219 #endif
220 
221 #ifndef SECCOMP_IOCTL_NOTIF_ADDFD
222 /* On success, the return value is the remote process's added fd number */
223 #define SECCOMP_IOCTL_NOTIF_ADDFD       SECCOMP_IOW(3,  \
224                                                 struct seccomp_notif_addfd)
225 
226 /* valid flags for seccomp_notif_addfd */
227 #define SECCOMP_ADDFD_FLAG_SETFD        (1UL << 0) /* Specify remote fd */
228 
229 struct seccomp_notif_addfd {
230         __u64 id;
231         __u32 flags;
232         __u32 srcfd;
233         __u32 newfd;
234         __u32 newfd_flags;
235 };
236 #endif
237 
238 #ifndef SECCOMP_ADDFD_FLAG_SEND
239 #define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
240 #endif
241 
242 struct seccomp_notif_addfd_small {
243         __u64 id;
244         char weird[4];
245 };
246 #define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \
247         SECCOMP_IOW(3, struct seccomp_notif_addfd_small)
248 
249 struct seccomp_notif_addfd_big {
250         union {
251                 struct seccomp_notif_addfd addfd;
252                 char buf[sizeof(struct seccomp_notif_addfd) + 8];
253         };
254 };
255 #define SECCOMP_IOCTL_NOTIF_ADDFD_BIG   \
256         SECCOMP_IOWR(3, struct seccomp_notif_addfd_big)
257 
258 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
259 #define PTRACE_EVENTMSG_SYSCALL_ENTRY   1
260 #define PTRACE_EVENTMSG_SYSCALL_EXIT    2
261 #endif
262 
263 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE
264 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
265 #endif
266 
267 #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH
268 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
269 #endif
270 
271 #ifndef seccomp
272 int seccomp(unsigned int op, unsigned int flags, void *args)
273 {
274         errno = 0;
275         return syscall(__NR_seccomp, op, flags, args);
276 }
277 #endif
278 
279 #if __BYTE_ORDER == __LITTLE_ENDIAN
280 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
281 #elif __BYTE_ORDER == __BIG_ENDIAN
282 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
283 #else
284 #error "wut? Unknown __BYTE_ORDER?!"
285 #endif
286 
287 #define SIBLING_EXIT_UNKILLED   0xbadbeef
288 #define SIBLING_EXIT_FAILURE    0xbadface
289 #define SIBLING_EXIT_NEWPRIVS   0xbadfeed
290 
291 static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
292 {
293 #ifdef __NR_kcmp
294         errno = 0;
295         return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
296 #else
297         errno = ENOSYS;
298         return -1;
299 #endif
300 }
301 
302 /* Have TH_LOG report actual location filecmp() is used. */
303 #define filecmp(pid1, pid2, fd1, fd2)   ({              \
304         int _ret;                                       \
305                                                         \
306         _ret = __filecmp(pid1, pid2, fd1, fd2);         \
307         if (_ret != 0) {                                \
308                 if (_ret < 0 && errno == ENOSYS) {      \
309                         TH_LOG("kcmp() syscall missing (test is less accurate)");\
310                         _ret = 0;                       \
311                 }                                       \
312         }                                               \
313         _ret; })
314 
315 TEST(kcmp)
316 {
317         int ret;
318 
319         ret = __filecmp(getpid(), getpid(), 1, 1);
320         EXPECT_EQ(ret, 0);
321         if (ret != 0 && errno == ENOSYS)
322                 SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)");
323 }
324 
325 TEST(mode_strict_support)
326 {
327         long ret;
328 
329         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
330         ASSERT_EQ(0, ret) {
331                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
332         }
333         syscall(__NR_exit, 0);
334 }
335 
336 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
337 {
338         long ret;
339 
340         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
341         ASSERT_EQ(0, ret) {
342                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
343         }
344         syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
345                 NULL, NULL, NULL);
346         EXPECT_FALSE(true) {
347                 TH_LOG("Unreachable!");
348         }
349 }
350 
351 /* Note! This doesn't test no new privs behavior */
352 TEST(no_new_privs_support)
353 {
354         long ret;
355 
356         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
357         EXPECT_EQ(0, ret) {
358                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
359         }
360 }
361 
362 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */
363 TEST(mode_filter_support)
364 {
365         long ret;
366 
367         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
368         ASSERT_EQ(0, ret) {
369                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
370         }
371         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
372         EXPECT_EQ(-1, ret);
373         EXPECT_EQ(EFAULT, errno) {
374                 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
375         }
376 }
377 
378 TEST(mode_filter_without_nnp)
379 {
380         struct sock_filter filter[] = {
381                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
382         };
383         struct sock_fprog prog = {
384                 .len = (unsigned short)ARRAY_SIZE(filter),
385                 .filter = filter,
386         };
387         long ret;
388 
389         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
390         ASSERT_LE(0, ret) {
391                 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
392         }
393         errno = 0;
394         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
395         /* Succeeds with CAP_SYS_ADMIN, fails without */
396         /* TODO(wad) check caps not euid */
397         if (geteuid()) {
398                 EXPECT_EQ(-1, ret);
399                 EXPECT_EQ(EACCES, errno);
400         } else {
401                 EXPECT_EQ(0, ret);
402         }
403 }
404 
405 #define MAX_INSNS_PER_PATH 32768
406 
407 TEST(filter_size_limits)
408 {
409         int i;
410         int count = BPF_MAXINSNS + 1;
411         struct sock_filter allow[] = {
412                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
413         };
414         struct sock_filter *filter;
415         struct sock_fprog prog = { };
416         long ret;
417 
418         filter = calloc(count, sizeof(*filter));
419         ASSERT_NE(NULL, filter);
420 
421         for (i = 0; i < count; i++)
422                 filter[i] = allow[0];
423 
424         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
425         ASSERT_EQ(0, ret);
426 
427         prog.filter = filter;
428         prog.len = count;
429 
430         /* Too many filter instructions in a single filter. */
431         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
432         ASSERT_NE(0, ret) {
433                 TH_LOG("Installing %d insn filter was allowed", prog.len);
434         }
435 
436         /* One less is okay, though. */
437         prog.len -= 1;
438         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
439         ASSERT_EQ(0, ret) {
440                 TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
441         }
442 }
443 
444 TEST(filter_chain_limits)
445 {
446         int i;
447         int count = BPF_MAXINSNS;
448         struct sock_filter allow[] = {
449                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
450         };
451         struct sock_filter *filter;
452         struct sock_fprog prog = { };
453         long ret;
454 
455         filter = calloc(count, sizeof(*filter));
456         ASSERT_NE(NULL, filter);
457 
458         for (i = 0; i < count; i++)
459                 filter[i] = allow[0];
460 
461         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
462         ASSERT_EQ(0, ret);
463 
464         prog.filter = filter;
465         prog.len = 1;
466 
467         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
468         ASSERT_EQ(0, ret);
469 
470         prog.len = count;
471 
472         /* Too many total filter instructions. */
473         for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
474                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
475                 if (ret != 0)
476                         break;
477         }
478         ASSERT_NE(0, ret) {
479                 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
480                        i, count, i * (count + 4));
481         }
482 }
483 
484 TEST(mode_filter_cannot_move_to_strict)
485 {
486         struct sock_filter filter[] = {
487                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
488         };
489         struct sock_fprog prog = {
490                 .len = (unsigned short)ARRAY_SIZE(filter),
491                 .filter = filter,
492         };
493         long ret;
494 
495         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
496         ASSERT_EQ(0, ret);
497 
498         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
499         ASSERT_EQ(0, ret);
500 
501         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
502         EXPECT_EQ(-1, ret);
503         EXPECT_EQ(EINVAL, errno);
504 }
505 
506 
507 TEST(mode_filter_get_seccomp)
508 {
509         struct sock_filter filter[] = {
510                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
511         };
512         struct sock_fprog prog = {
513                 .len = (unsigned short)ARRAY_SIZE(filter),
514                 .filter = filter,
515         };
516         long ret;
517 
518         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
519         ASSERT_EQ(0, ret);
520 
521         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
522         EXPECT_EQ(0, ret);
523 
524         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
525         ASSERT_EQ(0, ret);
526 
527         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
528         EXPECT_EQ(2, ret);
529 }
530 
531 
532 TEST(ALLOW_all)
533 {
534         struct sock_filter filter[] = {
535                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
536         };
537         struct sock_fprog prog = {
538                 .len = (unsigned short)ARRAY_SIZE(filter),
539                 .filter = filter,
540         };
541         long ret;
542 
543         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
544         ASSERT_EQ(0, ret);
545 
546         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
547         ASSERT_EQ(0, ret);
548 }
549 
550 TEST(empty_prog)
551 {
552         struct sock_filter filter[] = {
553         };
554         struct sock_fprog prog = {
555                 .len = (unsigned short)ARRAY_SIZE(filter),
556                 .filter = filter,
557         };
558         long ret;
559 
560         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
561         ASSERT_EQ(0, ret);
562 
563         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
564         EXPECT_EQ(-1, ret);
565         EXPECT_EQ(EINVAL, errno);
566 }
567 
568 TEST(log_all)
569 {
570         struct sock_filter filter[] = {
571                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
572         };
573         struct sock_fprog prog = {
574                 .len = (unsigned short)ARRAY_SIZE(filter),
575                 .filter = filter,
576         };
577         long ret;
578         pid_t parent = getppid();
579 
580         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
581         ASSERT_EQ(0, ret);
582 
583         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
584         ASSERT_EQ(0, ret);
585 
586         /* getppid() should succeed and be logged (no check for logging) */
587         EXPECT_EQ(parent, syscall(__NR_getppid));
588 }
589 
590 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
591 {
592         struct sock_filter filter[] = {
593                 BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
594         };
595         struct sock_fprog prog = {
596                 .len = (unsigned short)ARRAY_SIZE(filter),
597                 .filter = filter,
598         };
599         long ret;
600 
601         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
602         ASSERT_EQ(0, ret);
603 
604         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
605         ASSERT_EQ(0, ret);
606         EXPECT_EQ(0, syscall(__NR_getpid)) {
607                 TH_LOG("getpid() shouldn't ever return");
608         }
609 }
610 
611 /* return code >= 0x80000000 is unused. */
612 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
613 {
614         struct sock_filter filter[] = {
615                 BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
616         };
617         struct sock_fprog prog = {
618                 .len = (unsigned short)ARRAY_SIZE(filter),
619                 .filter = filter,
620         };
621         long ret;
622 
623         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
624         ASSERT_EQ(0, ret);
625 
626         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
627         ASSERT_EQ(0, ret);
628         EXPECT_EQ(0, syscall(__NR_getpid)) {
629                 TH_LOG("getpid() shouldn't ever return");
630         }
631 }
632 
633 TEST_SIGNAL(KILL_all, SIGSYS)
634 {
635         struct sock_filter filter[] = {
636                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
637         };
638         struct sock_fprog prog = {
639                 .len = (unsigned short)ARRAY_SIZE(filter),
640                 .filter = filter,
641         };
642         long ret;
643 
644         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
645         ASSERT_EQ(0, ret);
646 
647         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
648         ASSERT_EQ(0, ret);
649 }
650 
651 TEST_SIGNAL(KILL_one, SIGSYS)
652 {
653         struct sock_filter filter[] = {
654                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
655                         offsetof(struct seccomp_data, nr)),
656                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
657                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
658                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
659         };
660         struct sock_fprog prog = {
661                 .len = (unsigned short)ARRAY_SIZE(filter),
662                 .filter = filter,
663         };
664         long ret;
665         pid_t parent = getppid();
666 
667         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
668         ASSERT_EQ(0, ret);
669 
670         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
671         ASSERT_EQ(0, ret);
672 
673         EXPECT_EQ(parent, syscall(__NR_getppid));
674         /* getpid() should never return. */
675         EXPECT_EQ(0, syscall(__NR_getpid));
676 }
677 
678 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
679 {
680         void *fatal_address;
681         struct sock_filter filter[] = {
682                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
683                         offsetof(struct seccomp_data, nr)),
684                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
685                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
686                 /* Only both with lower 32-bit for now. */
687                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
688                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
689                         (unsigned long)&fatal_address, 0, 1),
690                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
691                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
692         };
693         struct sock_fprog prog = {
694                 .len = (unsigned short)ARRAY_SIZE(filter),
695                 .filter = filter,
696         };
697         long ret;
698         pid_t parent = getppid();
699         struct tms timebuf;
700         clock_t clock = times(&timebuf);
701 
702         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
703         ASSERT_EQ(0, ret);
704 
705         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
706         ASSERT_EQ(0, ret);
707 
708         EXPECT_EQ(parent, syscall(__NR_getppid));
709         EXPECT_LE(clock, syscall(__NR_times, &timebuf));
710         /* times() should never return. */
711         EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
712 }
713 
714 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
715 {
716 #ifndef __NR_mmap2
717         int sysno = __NR_mmap;
718 #else
719         int sysno = __NR_mmap2;
720 #endif
721         struct sock_filter filter[] = {
722                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
723                         offsetof(struct seccomp_data, nr)),
724                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
725                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
726                 /* Only both with lower 32-bit for now. */
727                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
728                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
729                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
730                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
731         };
732         struct sock_fprog prog = {
733                 .len = (unsigned short)ARRAY_SIZE(filter),
734                 .filter = filter,
735         };
736         long ret;
737         pid_t parent = getppid();
738         int fd;
739         void *map1, *map2;
740         int page_size = sysconf(_SC_PAGESIZE);
741 
742         ASSERT_LT(0, page_size);
743 
744         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
745         ASSERT_EQ(0, ret);
746 
747         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
748         ASSERT_EQ(0, ret);
749 
750         fd = open("/dev/zero", O_RDONLY);
751         ASSERT_NE(-1, fd);
752 
753         EXPECT_EQ(parent, syscall(__NR_getppid));
754         map1 = (void *)syscall(sysno,
755                 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
756         EXPECT_NE(MAP_FAILED, map1);
757         /* mmap2() should never return. */
758         map2 = (void *)syscall(sysno,
759                  NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
760         EXPECT_EQ(MAP_FAILED, map2);
761 
762         /* The test failed, so clean up the resources. */
763         munmap(map1, page_size);
764         munmap(map2, page_size);
765         close(fd);
766 }
767 
768 /* This is a thread task to die via seccomp filter violation. */
769 void *kill_thread(void *data)
770 {
771         bool die = (bool)data;
772 
773         if (die) {
774                 prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
775                 return (void *)SIBLING_EXIT_FAILURE;
776         }
777 
778         return (void *)SIBLING_EXIT_UNKILLED;
779 }
780 
781 enum kill_t {
782         KILL_THREAD,
783         KILL_PROCESS,
784         RET_UNKNOWN
785 };
786 
787 /* Prepare a thread that will kill itself or both of us. */
788 void kill_thread_or_group(struct __test_metadata *_metadata,
789                           enum kill_t kill_how)
790 {
791         pthread_t thread;
792         void *status;
793         /* Kill only when calling __NR_prctl. */
794         struct sock_filter filter_thread[] = {
795                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
796                         offsetof(struct seccomp_data, nr)),
797                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
798                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
799                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
800         };
801         struct sock_fprog prog_thread = {
802                 .len = (unsigned short)ARRAY_SIZE(filter_thread),
803                 .filter = filter_thread,
804         };
805         int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA;
806         struct sock_filter filter_process[] = {
807                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
808                         offsetof(struct seccomp_data, nr)),
809                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
810                 BPF_STMT(BPF_RET|BPF_K, kill),
811                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
812         };
813         struct sock_fprog prog_process = {
814                 .len = (unsigned short)ARRAY_SIZE(filter_process),
815                 .filter = filter_process,
816         };
817 
818         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
819                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
820         }
821 
822         ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
823                              kill_how == KILL_THREAD ? &prog_thread
824                                                      : &prog_process));
825 
826         /*
827          * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
828          * flag cannot be downgraded by a new filter.
829          */
830         if (kill_how == KILL_PROCESS)
831                 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
832 
833         /* Start a thread that will exit immediately. */
834         ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
835         ASSERT_EQ(0, pthread_join(thread, &status));
836         ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
837 
838         /* Start a thread that will die immediately. */
839         ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
840         ASSERT_EQ(0, pthread_join(thread, &status));
841         ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
842 
843         /*
844          * If we get here, only the spawned thread died. Let the parent know
845          * the whole process didn't die (i.e. this thread, the spawner,
846          * stayed running).
847          */
848         exit(42);
849 }
850 
851 TEST(KILL_thread)
852 {
853         int status;
854         pid_t child_pid;
855 
856         child_pid = fork();
857         ASSERT_LE(0, child_pid);
858         if (child_pid == 0) {
859                 kill_thread_or_group(_metadata, KILL_THREAD);
860                 _exit(38);
861         }
862 
863         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
864 
865         /* If only the thread was killed, we'll see exit 42. */
866         ASSERT_TRUE(WIFEXITED(status));
867         ASSERT_EQ(42, WEXITSTATUS(status));
868 }
869 
870 TEST(KILL_process)
871 {
872         int status;
873         pid_t child_pid;
874 
875         child_pid = fork();
876         ASSERT_LE(0, child_pid);
877         if (child_pid == 0) {
878                 kill_thread_or_group(_metadata, KILL_PROCESS);
879                 _exit(38);
880         }
881 
882         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
883 
884         /* If the entire process was killed, we'll see SIGSYS. */
885         ASSERT_TRUE(WIFSIGNALED(status));
886         ASSERT_EQ(SIGSYS, WTERMSIG(status));
887 }
888 
889 TEST(KILL_unknown)
890 {
891         int status;
892         pid_t child_pid;
893 
894         child_pid = fork();
895         ASSERT_LE(0, child_pid);
896         if (child_pid == 0) {
897                 kill_thread_or_group(_metadata, RET_UNKNOWN);
898                 _exit(38);
899         }
900 
901         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
902 
903         /* If the entire process was killed, we'll see SIGSYS. */
904         EXPECT_TRUE(WIFSIGNALED(status)) {
905                 TH_LOG("Unknown SECCOMP_RET is only killing the thread?");
906         }
907         ASSERT_EQ(SIGSYS, WTERMSIG(status));
908 }
909 
910 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
911 TEST(arg_out_of_range)
912 {
913         struct sock_filter filter[] = {
914                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
915                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
916         };
917         struct sock_fprog prog = {
918                 .len = (unsigned short)ARRAY_SIZE(filter),
919                 .filter = filter,
920         };
921         long ret;
922 
923         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
924         ASSERT_EQ(0, ret);
925 
926         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
927         EXPECT_EQ(-1, ret);
928         EXPECT_EQ(EINVAL, errno);
929 }
930 
931 #define ERRNO_FILTER(name, errno)                                       \
932         struct sock_filter _read_filter_##name[] = {                    \
933                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,                          \
934                         offsetof(struct seccomp_data, nr)),             \
935                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),       \
936                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),     \
937                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),             \
938         };                                                              \
939         struct sock_fprog prog_##name = {                               \
940                 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \
941                 .filter = _read_filter_##name,                          \
942         }
943 
944 /* Make sure basic errno values are correctly passed through a filter. */
945 TEST(ERRNO_valid)
946 {
947         ERRNO_FILTER(valid, E2BIG);
948         long ret;
949         pid_t parent = getppid();
950 
951         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
952         ASSERT_EQ(0, ret);
953 
954         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
955         ASSERT_EQ(0, ret);
956 
957         EXPECT_EQ(parent, syscall(__NR_getppid));
958         EXPECT_EQ(-1, read(0, NULL, 0));
959         EXPECT_EQ(E2BIG, errno);
960 }
961 
962 /* Make sure an errno of zero is correctly handled by the arch code. */
963 TEST(ERRNO_zero)
964 {
965         ERRNO_FILTER(zero, 0);
966         long ret;
967         pid_t parent = getppid();
968 
969         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
970         ASSERT_EQ(0, ret);
971 
972         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
973         ASSERT_EQ(0, ret);
974 
975         EXPECT_EQ(parent, syscall(__NR_getppid));
976         /* "errno" of 0 is ok. */
977         EXPECT_EQ(0, read(0, NULL, 0));
978 }
979 
980 /*
981  * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
982  * This tests that the errno value gets capped correctly, fixed by
983  * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
984  */
985 TEST(ERRNO_capped)
986 {
987         ERRNO_FILTER(capped, 4096);
988         long ret;
989         pid_t parent = getppid();
990 
991         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
992         ASSERT_EQ(0, ret);
993 
994         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
995         ASSERT_EQ(0, ret);
996 
997         EXPECT_EQ(parent, syscall(__NR_getppid));
998         EXPECT_EQ(-1, read(0, NULL, 0));
999         EXPECT_EQ(4095, errno);
1000 }
1001 
1002 /*
1003  * Filters are processed in reverse order: last applied is executed first.
1004  * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
1005  * SECCOMP_RET_DATA mask results will follow the most recently applied
1006  * matching filter return (and not the lowest or highest value).
1007  */
1008 TEST(ERRNO_order)
1009 {
1010         ERRNO_FILTER(first,  11);
1011         ERRNO_FILTER(second, 13);
1012         ERRNO_FILTER(third,  12);
1013         long ret;
1014         pid_t parent = getppid();
1015 
1016         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1017         ASSERT_EQ(0, ret);
1018 
1019         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
1020         ASSERT_EQ(0, ret);
1021 
1022         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
1023         ASSERT_EQ(0, ret);
1024 
1025         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
1026         ASSERT_EQ(0, ret);
1027 
1028         EXPECT_EQ(parent, syscall(__NR_getppid));
1029         EXPECT_EQ(-1, read(0, NULL, 0));
1030         EXPECT_EQ(12, errno);
1031 }
1032 
1033 FIXTURE(TRAP) {
1034         struct sock_fprog prog;
1035 };
1036 
1037 FIXTURE_SETUP(TRAP)
1038 {
1039         struct sock_filter filter[] = {
1040                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1041                         offsetof(struct seccomp_data, nr)),
1042                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1043                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1044                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1045         };
1046 
1047         memset(&self->prog, 0, sizeof(self->prog));
1048         self->prog.filter = malloc(sizeof(filter));
1049         ASSERT_NE(NULL, self->prog.filter);
1050         memcpy(self->prog.filter, filter, sizeof(filter));
1051         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1052 }
1053 
1054 FIXTURE_TEARDOWN(TRAP)
1055 {
1056         if (self->prog.filter)
1057                 free(self->prog.filter);
1058 }
1059 
1060 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
1061 {
1062         long ret;
1063 
1064         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1065         ASSERT_EQ(0, ret);
1066 
1067         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1068         ASSERT_EQ(0, ret);
1069         syscall(__NR_getpid);
1070 }
1071 
1072 /* Ensure that SIGSYS overrides SIG_IGN */
1073 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
1074 {
1075         long ret;
1076 
1077         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1078         ASSERT_EQ(0, ret);
1079 
1080         signal(SIGSYS, SIG_IGN);
1081 
1082         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1083         ASSERT_EQ(0, ret);
1084         syscall(__NR_getpid);
1085 }
1086 
1087 static siginfo_t TRAP_info;
1088 static volatile int TRAP_nr;
1089 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
1090 {
1091         memcpy(&TRAP_info, info, sizeof(TRAP_info));
1092         TRAP_nr = nr;
1093 }
1094 
1095 TEST_F(TRAP, handler)
1096 {
1097         int ret, test;
1098         struct sigaction act;
1099         sigset_t mask;
1100 
1101         memset(&act, 0, sizeof(act));
1102         sigemptyset(&mask);
1103         sigaddset(&mask, SIGSYS);
1104 
1105         act.sa_sigaction = &TRAP_action;
1106         act.sa_flags = SA_SIGINFO;
1107         ret = sigaction(SIGSYS, &act, NULL);
1108         ASSERT_EQ(0, ret) {
1109                 TH_LOG("sigaction failed");
1110         }
1111         ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
1112         ASSERT_EQ(0, ret) {
1113                 TH_LOG("sigprocmask failed");
1114         }
1115 
1116         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1117         ASSERT_EQ(0, ret);
1118         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1119         ASSERT_EQ(0, ret);
1120         TRAP_nr = 0;
1121         memset(&TRAP_info, 0, sizeof(TRAP_info));
1122         /* Expect the registers to be rolled back. (nr = error) may vary
1123          * based on arch. */
1124         ret = syscall(__NR_getpid);
1125         /* Silence gcc warning about volatile. */
1126         test = TRAP_nr;
1127         EXPECT_EQ(SIGSYS, test);
1128         struct local_sigsys {
1129                 void *_call_addr;       /* calling user insn */
1130                 int _syscall;           /* triggering system call number */
1131                 unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
1132         } *sigsys = (struct local_sigsys *)
1133 #ifdef si_syscall
1134                 &(TRAP_info.si_call_addr);
1135 #else
1136                 &TRAP_info.si_pid;
1137 #endif
1138         EXPECT_EQ(__NR_getpid, sigsys->_syscall);
1139         /* Make sure arch is non-zero. */
1140         EXPECT_NE(0, sigsys->_arch);
1141         EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
1142 }
1143 
1144 FIXTURE(precedence) {
1145         struct sock_fprog allow;
1146         struct sock_fprog log;
1147         struct sock_fprog trace;
1148         struct sock_fprog error;
1149         struct sock_fprog trap;
1150         struct sock_fprog kill;
1151 };
1152 
1153 FIXTURE_SETUP(precedence)
1154 {
1155         struct sock_filter allow_insns[] = {
1156                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1157         };
1158         struct sock_filter log_insns[] = {
1159                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1160                         offsetof(struct seccomp_data, nr)),
1161                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1162                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1163                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
1164         };
1165         struct sock_filter trace_insns[] = {
1166                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1167                         offsetof(struct seccomp_data, nr)),
1168                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1169                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1170                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
1171         };
1172         struct sock_filter error_insns[] = {
1173                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1174                         offsetof(struct seccomp_data, nr)),
1175                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1176                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1177                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
1178         };
1179         struct sock_filter trap_insns[] = {
1180                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1181                         offsetof(struct seccomp_data, nr)),
1182                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1183                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1184                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1185         };
1186         struct sock_filter kill_insns[] = {
1187                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1188                         offsetof(struct seccomp_data, nr)),
1189                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1190                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1191                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1192         };
1193 
1194         memset(self, 0, sizeof(*self));
1195 #define FILTER_ALLOC(_x) \
1196         self->_x.filter = malloc(sizeof(_x##_insns)); \
1197         ASSERT_NE(NULL, self->_x.filter); \
1198         memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1199         self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1200         FILTER_ALLOC(allow);
1201         FILTER_ALLOC(log);
1202         FILTER_ALLOC(trace);
1203         FILTER_ALLOC(error);
1204         FILTER_ALLOC(trap);
1205         FILTER_ALLOC(kill);
1206 }
1207 
1208 FIXTURE_TEARDOWN(precedence)
1209 {
1210 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1211         FILTER_FREE(allow);
1212         FILTER_FREE(log);
1213         FILTER_FREE(trace);
1214         FILTER_FREE(error);
1215         FILTER_FREE(trap);
1216         FILTER_FREE(kill);
1217 }
1218 
1219 TEST_F(precedence, allow_ok)
1220 {
1221         pid_t parent, res = 0;
1222         long ret;
1223 
1224         parent = getppid();
1225         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1226         ASSERT_EQ(0, ret);
1227 
1228         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1229         ASSERT_EQ(0, ret);
1230         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1231         ASSERT_EQ(0, ret);
1232         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1233         ASSERT_EQ(0, ret);
1234         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1235         ASSERT_EQ(0, ret);
1236         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1237         ASSERT_EQ(0, ret);
1238         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1239         ASSERT_EQ(0, ret);
1240         /* Should work just fine. */
1241         res = syscall(__NR_getppid);
1242         EXPECT_EQ(parent, res);
1243 }
1244 
1245 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1246 {
1247         pid_t parent, res = 0;
1248         long ret;
1249 
1250         parent = getppid();
1251         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1252         ASSERT_EQ(0, ret);
1253 
1254         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1255         ASSERT_EQ(0, ret);
1256         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1257         ASSERT_EQ(0, ret);
1258         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1259         ASSERT_EQ(0, ret);
1260         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1261         ASSERT_EQ(0, ret);
1262         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1263         ASSERT_EQ(0, ret);
1264         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1265         ASSERT_EQ(0, ret);
1266         /* Should work just fine. */
1267         res = syscall(__NR_getppid);
1268         EXPECT_EQ(parent, res);
1269         /* getpid() should never return. */
1270         res = syscall(__NR_getpid);
1271         EXPECT_EQ(0, res);
1272 }
1273 
1274 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1275 {
1276         pid_t parent;
1277         long ret;
1278 
1279         parent = getppid();
1280         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1281         ASSERT_EQ(0, ret);
1282 
1283         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1284         ASSERT_EQ(0, ret);
1285         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1286         ASSERT_EQ(0, ret);
1287         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1288         ASSERT_EQ(0, ret);
1289         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1290         ASSERT_EQ(0, ret);
1291         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1292         ASSERT_EQ(0, ret);
1293         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1294         ASSERT_EQ(0, ret);
1295         /* Should work just fine. */
1296         EXPECT_EQ(parent, syscall(__NR_getppid));
1297         /* getpid() should never return. */
1298         EXPECT_EQ(0, syscall(__NR_getpid));
1299 }
1300 
1301 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1302 {
1303         pid_t parent;
1304         long ret;
1305 
1306         parent = getppid();
1307         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1308         ASSERT_EQ(0, ret);
1309 
1310         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1311         ASSERT_EQ(0, ret);
1312         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1313         ASSERT_EQ(0, ret);
1314         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1315         ASSERT_EQ(0, ret);
1316         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1317         ASSERT_EQ(0, ret);
1318         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1319         ASSERT_EQ(0, ret);
1320         /* Should work just fine. */
1321         EXPECT_EQ(parent, syscall(__NR_getppid));
1322         /* getpid() should never return. */
1323         EXPECT_EQ(0, syscall(__NR_getpid));
1324 }
1325 
1326 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1327 {
1328         pid_t parent;
1329         long ret;
1330 
1331         parent = getppid();
1332         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1333         ASSERT_EQ(0, ret);
1334 
1335         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1336         ASSERT_EQ(0, ret);
1337         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1338         ASSERT_EQ(0, ret);
1339         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1340         ASSERT_EQ(0, ret);
1341         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1342         ASSERT_EQ(0, ret);
1343         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1344         ASSERT_EQ(0, ret);
1345         /* Should work just fine. */
1346         EXPECT_EQ(parent, syscall(__NR_getppid));
1347         /* getpid() should never return. */
1348         EXPECT_EQ(0, syscall(__NR_getpid));
1349 }
1350 
1351 TEST_F(precedence, errno_is_third)
1352 {
1353         pid_t parent;
1354         long ret;
1355 
1356         parent = getppid();
1357         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1358         ASSERT_EQ(0, ret);
1359 
1360         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1361         ASSERT_EQ(0, ret);
1362         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1363         ASSERT_EQ(0, ret);
1364         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1365         ASSERT_EQ(0, ret);
1366         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1367         ASSERT_EQ(0, ret);
1368         /* Should work just fine. */
1369         EXPECT_EQ(parent, syscall(__NR_getppid));
1370         EXPECT_EQ(0, syscall(__NR_getpid));
1371 }
1372 
1373 TEST_F(precedence, errno_is_third_in_any_order)
1374 {
1375         pid_t parent;
1376         long ret;
1377 
1378         parent = getppid();
1379         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1380         ASSERT_EQ(0, ret);
1381 
1382         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1383         ASSERT_EQ(0, ret);
1384         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1385         ASSERT_EQ(0, ret);
1386         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1387         ASSERT_EQ(0, ret);
1388         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1389         ASSERT_EQ(0, ret);
1390         /* Should work just fine. */
1391         EXPECT_EQ(parent, syscall(__NR_getppid));
1392         EXPECT_EQ(0, syscall(__NR_getpid));
1393 }
1394 
1395 TEST_F(precedence, trace_is_fourth)
1396 {
1397         pid_t parent;
1398         long ret;
1399 
1400         parent = getppid();
1401         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1402         ASSERT_EQ(0, ret);
1403 
1404         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1405         ASSERT_EQ(0, ret);
1406         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1407         ASSERT_EQ(0, ret);
1408         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1409         ASSERT_EQ(0, ret);
1410         /* Should work just fine. */
1411         EXPECT_EQ(parent, syscall(__NR_getppid));
1412         /* No ptracer */
1413         EXPECT_EQ(-1, syscall(__NR_getpid));
1414 }
1415 
1416 TEST_F(precedence, trace_is_fourth_in_any_order)
1417 {
1418         pid_t parent;
1419         long ret;
1420 
1421         parent = getppid();
1422         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1423         ASSERT_EQ(0, ret);
1424 
1425         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1426         ASSERT_EQ(0, ret);
1427         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1428         ASSERT_EQ(0, ret);
1429         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1430         ASSERT_EQ(0, ret);
1431         /* Should work just fine. */
1432         EXPECT_EQ(parent, syscall(__NR_getppid));
1433         /* No ptracer */
1434         EXPECT_EQ(-1, syscall(__NR_getpid));
1435 }
1436 
1437 TEST_F(precedence, log_is_fifth)
1438 {
1439         pid_t mypid, parent;
1440         long ret;
1441 
1442         mypid = getpid();
1443         parent = getppid();
1444         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1445         ASSERT_EQ(0, ret);
1446 
1447         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1448         ASSERT_EQ(0, ret);
1449         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1450         ASSERT_EQ(0, ret);
1451         /* Should work just fine. */
1452         EXPECT_EQ(parent, syscall(__NR_getppid));
1453         /* Should also work just fine */
1454         EXPECT_EQ(mypid, syscall(__NR_getpid));
1455 }
1456 
1457 TEST_F(precedence, log_is_fifth_in_any_order)
1458 {
1459         pid_t mypid, parent;
1460         long ret;
1461 
1462         mypid = getpid();
1463         parent = getppid();
1464         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1465         ASSERT_EQ(0, ret);
1466 
1467         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1468         ASSERT_EQ(0, ret);
1469         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1470         ASSERT_EQ(0, ret);
1471         /* Should work just fine. */
1472         EXPECT_EQ(parent, syscall(__NR_getppid));
1473         /* Should also work just fine */
1474         EXPECT_EQ(mypid, syscall(__NR_getpid));
1475 }
1476 
1477 #ifndef PTRACE_O_TRACESECCOMP
1478 #define PTRACE_O_TRACESECCOMP   0x00000080
1479 #endif
1480 
1481 /* Catch the Ubuntu 12.04 value error. */
1482 #if PTRACE_EVENT_SECCOMP != 7
1483 #undef PTRACE_EVENT_SECCOMP
1484 #endif
1485 
1486 #ifndef PTRACE_EVENT_SECCOMP
1487 #define PTRACE_EVENT_SECCOMP 7
1488 #endif
1489 
1490 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1491 bool tracer_running;
1492 void tracer_stop(int sig)
1493 {
1494         tracer_running = false;
1495 }
1496 
1497 typedef void tracer_func_t(struct __test_metadata *_metadata,
1498                            pid_t tracee, int status, void *args);
1499 
1500 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1501             tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1502 {
1503         int ret = -1;
1504         struct sigaction action = {
1505                 .sa_handler = tracer_stop,
1506         };
1507 
1508         /* Allow external shutdown. */
1509         tracer_running = true;
1510         ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1511 
1512         errno = 0;
1513         while (ret == -1 && errno != EINVAL)
1514                 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1515         ASSERT_EQ(0, ret) {
1516                 kill(tracee, SIGKILL);
1517         }
1518         /* Wait for attach stop */
1519         wait(NULL);
1520 
1521         ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1522                                                       PTRACE_O_TRACESYSGOOD :
1523                                                       PTRACE_O_TRACESECCOMP);
1524         ASSERT_EQ(0, ret) {
1525                 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1526                 kill(tracee, SIGKILL);
1527         }
1528         ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1529                      tracee, NULL, 0);
1530         ASSERT_EQ(0, ret);
1531 
1532         /* Unblock the tracee */
1533         ASSERT_EQ(1, write(fd, "A", 1));
1534         ASSERT_EQ(0, close(fd));
1535 
1536         /* Run until we're shut down. Must assert to stop execution. */
1537         while (tracer_running) {
1538                 int status;
1539 
1540                 if (wait(&status) != tracee)
1541                         continue;
1542                 if (WIFSIGNALED(status) || WIFEXITED(status))
1543                         /* Child is dead. Time to go. */
1544                         return;
1545 
1546                 /* Check if this is a seccomp event. */
1547                 ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1548 
1549                 tracer_func(_metadata, tracee, status, args);
1550 
1551                 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1552                              tracee, NULL, 0);
1553                 ASSERT_EQ(0, ret);
1554         }
1555         /* Directly report the status of our test harness results. */
1556         syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1557 }
1558 
1559 /* Common tracer setup/teardown functions. */
1560 void cont_handler(int num)
1561 { }
1562 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1563                           tracer_func_t func, void *args, bool ptrace_syscall)
1564 {
1565         char sync;
1566         int pipefd[2];
1567         pid_t tracer_pid;
1568         pid_t tracee = getpid();
1569 
1570         /* Setup a pipe for clean synchronization. */
1571         ASSERT_EQ(0, pipe(pipefd));
1572 
1573         /* Fork a child which we'll promote to tracer */
1574         tracer_pid = fork();
1575         ASSERT_LE(0, tracer_pid);
1576         signal(SIGALRM, cont_handler);
1577         if (tracer_pid == 0) {
1578                 close(pipefd[0]);
1579                 start_tracer(_metadata, pipefd[1], tracee, func, args,
1580                              ptrace_syscall);
1581                 syscall(__NR_exit, 0);
1582         }
1583         close(pipefd[1]);
1584         prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1585         read(pipefd[0], &sync, 1);
1586         close(pipefd[0]);
1587 
1588         return tracer_pid;
1589 }
1590 
1591 void teardown_trace_fixture(struct __test_metadata *_metadata,
1592                             pid_t tracer)
1593 {
1594         if (tracer) {
1595                 int status;
1596                 /*
1597                  * Extract the exit code from the other process and
1598                  * adopt it for ourselves in case its asserts failed.
1599                  */
1600                 ASSERT_EQ(0, kill(tracer, SIGUSR1));
1601                 ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1602                 if (WEXITSTATUS(status))
1603                         _metadata->passed = 0;
1604         }
1605 }
1606 
1607 /* "poke" tracer arguments and function. */
1608 struct tracer_args_poke_t {
1609         unsigned long poke_addr;
1610 };
1611 
1612 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1613                  void *args)
1614 {
1615         int ret;
1616         unsigned long msg;
1617         struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1618 
1619         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1620         EXPECT_EQ(0, ret);
1621         /* If this fails, don't try to recover. */
1622         ASSERT_EQ(0x1001, msg) {
1623                 kill(tracee, SIGKILL);
1624         }
1625         /*
1626          * Poke in the message.
1627          * Registers are not touched to try to keep this relatively arch
1628          * agnostic.
1629          */
1630         ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1631         EXPECT_EQ(0, ret);
1632 }
1633 
1634 FIXTURE(TRACE_poke) {
1635         struct sock_fprog prog;
1636         pid_t tracer;
1637         long poked;
1638         struct tracer_args_poke_t tracer_args;
1639 };
1640 
1641 FIXTURE_SETUP(TRACE_poke)
1642 {
1643         struct sock_filter filter[] = {
1644                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1645                         offsetof(struct seccomp_data, nr)),
1646                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1647                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1648                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1649         };
1650 
1651         self->poked = 0;
1652         memset(&self->prog, 0, sizeof(self->prog));
1653         self->prog.filter = malloc(sizeof(filter));
1654         ASSERT_NE(NULL, self->prog.filter);
1655         memcpy(self->prog.filter, filter, sizeof(filter));
1656         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1657 
1658         /* Set up tracer args. */
1659         self->tracer_args.poke_addr = (unsigned long)&self->poked;
1660 
1661         /* Launch tracer. */
1662         self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1663                                            &self->tracer_args, false);
1664 }
1665 
1666 FIXTURE_TEARDOWN(TRACE_poke)
1667 {
1668         teardown_trace_fixture(_metadata, self->tracer);
1669         if (self->prog.filter)
1670                 free(self->prog.filter);
1671 }
1672 
1673 TEST_F(TRACE_poke, read_has_side_effects)
1674 {
1675         ssize_t ret;
1676 
1677         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1678         ASSERT_EQ(0, ret);
1679 
1680         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1681         ASSERT_EQ(0, ret);
1682 
1683         EXPECT_EQ(0, self->poked);
1684         ret = read(-1, NULL, 0);
1685         EXPECT_EQ(-1, ret);
1686         EXPECT_EQ(0x1001, self->poked);
1687 }
1688 
1689 TEST_F(TRACE_poke, getpid_runs_normally)
1690 {
1691         long ret;
1692 
1693         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1694         ASSERT_EQ(0, ret);
1695 
1696         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1697         ASSERT_EQ(0, ret);
1698 
1699         EXPECT_EQ(0, self->poked);
1700         EXPECT_NE(0, syscall(__NR_getpid));
1701         EXPECT_EQ(0, self->poked);
1702 }
1703 
1704 #if defined(__x86_64__)
1705 # define ARCH_REGS              struct user_regs_struct
1706 # define SYSCALL_NUM(_regs)     (_regs).orig_rax
1707 # define SYSCALL_RET(_regs)     (_regs).rax
1708 #elif defined(__i386__)
1709 # define ARCH_REGS              struct user_regs_struct
1710 # define SYSCALL_NUM(_regs)     (_regs).orig_eax
1711 # define SYSCALL_RET(_regs)     (_regs).eax
1712 #elif defined(__arm__)
1713 # define ARCH_REGS              struct pt_regs
1714 # define SYSCALL_NUM(_regs)     (_regs).ARM_r7
1715 # ifndef PTRACE_SET_SYSCALL
1716 #  define PTRACE_SET_SYSCALL   23
1717 # endif
1718 # define SYSCALL_NUM_SET(_regs, _nr)    \
1719                 EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr))
1720 # define SYSCALL_RET(_regs)     (_regs).ARM_r0
1721 #elif defined(__aarch64__)
1722 # define ARCH_REGS              struct user_pt_regs
1723 # define SYSCALL_NUM(_regs)     (_regs).regs[8]
1724 # ifndef NT_ARM_SYSTEM_CALL
1725 #  define NT_ARM_SYSTEM_CALL 0x404
1726 # endif
1727 # define SYSCALL_NUM_SET(_regs, _nr)                            \
1728         do {                                                    \
1729                 struct iovec __v;                               \
1730                 typeof(_nr) __nr = (_nr);                       \
1731                 __v.iov_base = &__nr;                           \
1732                 __v.iov_len = sizeof(__nr);                     \
1733                 EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee,   \
1734                                     NT_ARM_SYSTEM_CALL, &__v)); \
1735         } while (0)
1736 # define SYSCALL_RET(_regs)     (_regs).regs[0]
1737 #elif defined(__riscv) && __riscv_xlen == 64
1738 # define ARCH_REGS              struct user_regs_struct
1739 # define SYSCALL_NUM(_regs)     (_regs).a7
1740 # define SYSCALL_RET(_regs)     (_regs).a0
1741 #elif defined(__csky__)
1742 # define ARCH_REGS              struct pt_regs
1743 #  if defined(__CSKYABIV2__)
1744 #   define SYSCALL_NUM(_regs)   (_regs).regs[3]
1745 #  else
1746 #   define SYSCALL_NUM(_regs)   (_regs).regs[9]
1747 #  endif
1748 # define SYSCALL_RET(_regs)     (_regs).a0
1749 #elif defined(__hppa__)
1750 # define ARCH_REGS              struct user_regs_struct
1751 # define SYSCALL_NUM(_regs)     (_regs).gr[20]
1752 # define SYSCALL_RET(_regs)     (_regs).gr[28]
1753 #elif defined(__powerpc__)
1754 # define ARCH_REGS              struct pt_regs
1755 # define SYSCALL_NUM(_regs)     (_regs).gpr[0]
1756 # define SYSCALL_RET(_regs)     (_regs).gpr[3]
1757 # define SYSCALL_RET_SET(_regs, _val)                           \
1758         do {                                                    \
1759                 typeof(_val) _result = (_val);                  \
1760                 if ((_regs.trap & 0xfff0) == 0x3000) {          \
1761                         /*                                      \
1762                          * scv 0 system call uses -ve result    \
1763                          * for error, so no need to adjust.     \
1764                          */                                     \
1765                         SYSCALL_RET(_regs) = _result;           \
1766                 } else {                                        \
1767                         /*                                      \
1768                          * A syscall error is signaled by the   \
1769                          * CR0 SO bit and the code is stored as \
1770                          * a positive value.                    \
1771                          */                                     \
1772                         if (_result < 0) {                      \
1773                                 SYSCALL_RET(_regs) = -_result;  \
1774                                 (_regs).ccr |= 0x10000000;      \
1775                         } else {                                \
1776                                 SYSCALL_RET(_regs) = _result;   \
1777                                 (_regs).ccr &= ~0x10000000;     \
1778                         }                                       \
1779                 }                                               \
1780         } while (0)
1781 # define SYSCALL_RET_SET_ON_PTRACE_EXIT
1782 #elif defined(__s390__)
1783 # define ARCH_REGS              s390_regs
1784 # define SYSCALL_NUM(_regs)     (_regs).gprs[2]
1785 # define SYSCALL_RET_SET(_regs, _val)                   \
1786                 TH_LOG("Can't modify syscall return on this architecture")
1787 #elif defined(__mips__)
1788 # include <asm/unistd_nr_n32.h>
1789 # include <asm/unistd_nr_n64.h>
1790 # include <asm/unistd_nr_o32.h>
1791 # define ARCH_REGS              struct pt_regs
1792 # define SYSCALL_NUM(_regs)                             \
1793         ({                                              \
1794                 typeof((_regs).regs[2]) _nr;            \
1795                 if ((_regs).regs[2] == __NR_O32_Linux)  \
1796                         _nr = (_regs).regs[4];          \
1797                 else                                    \
1798                         _nr = (_regs).regs[2];          \
1799                 _nr;                                    \
1800         })
1801 # define SYSCALL_NUM_SET(_regs, _nr)                    \
1802         do {                                            \
1803                 if ((_regs).regs[2] == __NR_O32_Linux)  \
1804                         (_regs).regs[4] = _nr;          \
1805                 else                                    \
1806                         (_regs).regs[2] = _nr;          \
1807         } while (0)
1808 # define SYSCALL_RET_SET(_regs, _val)                   \
1809                 TH_LOG("Can't modify syscall return on this architecture")
1810 #elif defined(__xtensa__)
1811 # define ARCH_REGS              struct user_pt_regs
1812 # define SYSCALL_NUM(_regs)     (_regs).syscall
1813 /*
1814  * On xtensa syscall return value is in the register
1815  * a2 of the current window which is not fixed.
1816  */
1817 #define SYSCALL_RET(_regs)      (_regs).a[(_regs).windowbase * 4 + 2]
1818 #elif defined(__sh__)
1819 # define ARCH_REGS              struct pt_regs
1820 # define SYSCALL_NUM(_regs)     (_regs).regs[3]
1821 # define SYSCALL_RET(_regs)     (_regs).regs[0]
1822 #else
1823 # error "Do not know how to find your architecture's registers and syscalls"
1824 #endif
1825 
1826 /*
1827  * Most architectures can change the syscall by just updating the
1828  * associated register. This is the default if not defined above.
1829  */
1830 #ifndef SYSCALL_NUM_SET
1831 # define SYSCALL_NUM_SET(_regs, _nr)            \
1832         do {                                    \
1833                 SYSCALL_NUM(_regs) = (_nr);     \
1834         } while (0)
1835 #endif
1836 /*
1837  * Most architectures can change the syscall return value by just
1838  * writing to the SYSCALL_RET register. This is the default if not
1839  * defined above. If an architecture cannot set the return value
1840  * (for example when the syscall and return value register is
1841  * shared), report it with TH_LOG() in an arch-specific definition
1842  * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined.
1843  */
1844 #if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET)
1845 # error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch"
1846 #endif
1847 #ifndef SYSCALL_RET_SET
1848 # define SYSCALL_RET_SET(_regs, _val)           \
1849         do {                                    \
1850                 SYSCALL_RET(_regs) = (_val);    \
1851         } while (0)
1852 #endif
1853 
1854 /* When the syscall return can't be changed, stub out the tests for it. */
1855 #ifndef SYSCALL_RET
1856 # define EXPECT_SYSCALL_RETURN(val, action)     EXPECT_EQ(-1, action)
1857 #else
1858 # define EXPECT_SYSCALL_RETURN(val, action)             \
1859         do {                                            \
1860                 errno = 0;                              \
1861                 if (val < 0) {                          \
1862                         EXPECT_EQ(-1, action);          \
1863                         EXPECT_EQ(-(val), errno);       \
1864                 } else {                                \
1865                         EXPECT_EQ(val, action);         \
1866                 }                                       \
1867         } while (0)
1868 #endif
1869 
1870 /*
1871  * Some architectures (e.g. powerpc) can only set syscall
1872  * return values on syscall exit during ptrace.
1873  */
1874 const bool ptrace_entry_set_syscall_nr = true;
1875 const bool ptrace_entry_set_syscall_ret =
1876 #ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT
1877         true;
1878 #else
1879         false;
1880 #endif
1881 
1882 /*
1883  * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1884  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1885  */
1886 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1887 # define ARCH_GETREGS(_regs)    ptrace(PTRACE_GETREGS, tracee, 0, &(_regs))
1888 # define ARCH_SETREGS(_regs)    ptrace(PTRACE_SETREGS, tracee, 0, &(_regs))
1889 #else
1890 # define ARCH_GETREGS(_regs)    ({                                      \
1891                 struct iovec __v;                                       \
1892                 __v.iov_base = &(_regs);                                \
1893                 __v.iov_len = sizeof(_regs);                            \
1894                 ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v);    \
1895         })
1896 # define ARCH_SETREGS(_regs)    ({                                      \
1897                 struct iovec __v;                                       \
1898                 __v.iov_base = &(_regs);                                \
1899                 __v.iov_len = sizeof(_regs);                            \
1900                 ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v);    \
1901         })
1902 #endif
1903 
1904 /* Architecture-specific syscall fetching routine. */
1905 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1906 {
1907         ARCH_REGS regs;
1908 
1909         EXPECT_EQ(0, ARCH_GETREGS(regs)) {
1910                 return -1;
1911         }
1912 
1913         return SYSCALL_NUM(regs);
1914 }
1915 
1916 /* Architecture-specific syscall changing routine. */
1917 void __change_syscall(struct __test_metadata *_metadata,
1918                     pid_t tracee, long *syscall, long *ret)
1919 {
1920         ARCH_REGS orig, regs;
1921 
1922         /* Do not get/set registers if we have nothing to do. */
1923         if (!syscall && !ret)
1924                 return;
1925 
1926         EXPECT_EQ(0, ARCH_GETREGS(regs)) {
1927                 return;
1928         }
1929         orig = regs;
1930 
1931         if (syscall)
1932                 SYSCALL_NUM_SET(regs, *syscall);
1933 
1934         if (ret)
1935                 SYSCALL_RET_SET(regs, *ret);
1936 
1937         /* Flush any register changes made. */
1938         if (memcmp(&orig, &regs, sizeof(orig)) != 0)
1939                 EXPECT_EQ(0, ARCH_SETREGS(regs));
1940 }
1941 
1942 /* Change only syscall number. */
1943 void change_syscall_nr(struct __test_metadata *_metadata,
1944                        pid_t tracee, long syscall)
1945 {
1946         __change_syscall(_metadata, tracee, &syscall, NULL);
1947 }
1948 
1949 /* Change syscall return value (and set syscall number to -1). */
1950 void change_syscall_ret(struct __test_metadata *_metadata,
1951                         pid_t tracee, long ret)
1952 {
1953         long syscall = -1;
1954 
1955         __change_syscall(_metadata, tracee, &syscall, &ret);
1956 }
1957 
1958 void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
1959                     int status, void *args)
1960 {
1961         int ret;
1962         unsigned long msg;
1963 
1964         /* Make sure we got the right message. */
1965         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1966         EXPECT_EQ(0, ret);
1967 
1968         /* Validate and take action on expected syscalls. */
1969         switch (msg) {
1970         case 0x1002:
1971                 /* change getpid to getppid. */
1972                 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1973                 change_syscall_nr(_metadata, tracee, __NR_getppid);
1974                 break;
1975         case 0x1003:
1976                 /* skip gettid with valid return code. */
1977                 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1978                 change_syscall_ret(_metadata, tracee, 45000);
1979                 break;
1980         case 0x1004:
1981                 /* skip openat with error. */
1982                 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
1983                 change_syscall_ret(_metadata, tracee, -ESRCH);
1984                 break;
1985         case 0x1005:
1986                 /* do nothing (allow getppid) */
1987                 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1988                 break;
1989         default:
1990                 EXPECT_EQ(0, msg) {
1991                         TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1992                         kill(tracee, SIGKILL);
1993                 }
1994         }
1995 
1996 }
1997 
1998 FIXTURE(TRACE_syscall) {
1999         struct sock_fprog prog;
2000         pid_t tracer, mytid, mypid, parent;
2001         long syscall_nr;
2002 };
2003 
2004 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
2005                    int status, void *args)
2006 {
2007         int ret;
2008         unsigned long msg;
2009         static bool entry;
2010         long syscall_nr_val, syscall_ret_val;
2011         long *syscall_nr = NULL, *syscall_ret = NULL;
2012         FIXTURE_DATA(TRACE_syscall) *self = args;
2013 
2014         /*
2015          * The traditional way to tell PTRACE_SYSCALL entry/exit
2016          * is by counting.
2017          */
2018         entry = !entry;
2019 
2020         /* Make sure we got an appropriate message. */
2021         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
2022         EXPECT_EQ(0, ret);
2023         EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
2024                         : PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
2025 
2026         /*
2027          * Some architectures only support setting return values during
2028          * syscall exit under ptrace, and on exit the syscall number may
2029          * no longer be available. Therefore, save the initial sycall
2030          * number here, so it can be examined during both entry and exit
2031          * phases.
2032          */
2033         if (entry)
2034                 self->syscall_nr = get_syscall(_metadata, tracee);
2035 
2036         /*
2037          * Depending on the architecture's syscall setting abilities, we
2038          * pick which things to set during this phase (entry or exit).
2039          */
2040         if (entry == ptrace_entry_set_syscall_nr)
2041                 syscall_nr = &syscall_nr_val;
2042         if (entry == ptrace_entry_set_syscall_ret)
2043                 syscall_ret = &syscall_ret_val;
2044 
2045         /* Now handle the actual rewriting cases. */
2046         switch (self->syscall_nr) {
2047         case __NR_getpid:
2048                 syscall_nr_val = __NR_getppid;
2049                 /* Never change syscall return for this case. */
2050                 syscall_ret = NULL;
2051                 break;
2052         case __NR_gettid:
2053                 syscall_nr_val = -1;
2054                 syscall_ret_val = 45000;
2055                 break;
2056         case __NR_openat:
2057                 syscall_nr_val = -1;
2058                 syscall_ret_val = -ESRCH;
2059                 break;
2060         default:
2061                 /* Unhandled, do nothing. */
2062                 return;
2063         }
2064 
2065         __change_syscall(_metadata, tracee, syscall_nr, syscall_ret);
2066 }
2067 
2068 FIXTURE_VARIANT(TRACE_syscall) {
2069         /*
2070          * All of the SECCOMP_RET_TRACE behaviors can be tested with either
2071          * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL.
2072          * This indicates if we should use SECCOMP_RET_TRACE (false), or
2073          * ptrace (true).
2074          */
2075         bool use_ptrace;
2076 };
2077 
2078 FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) {
2079         .use_ptrace = true,
2080 };
2081 
2082 FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) {
2083         .use_ptrace = false,
2084 };
2085 
2086 FIXTURE_SETUP(TRACE_syscall)
2087 {
2088         struct sock_filter filter[] = {
2089                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2090                         offsetof(struct seccomp_data, nr)),
2091                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2092                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
2093                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
2094                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
2095                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
2096                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
2097                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2098                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
2099                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2100         };
2101         struct sock_fprog prog = {
2102                 .len = (unsigned short)ARRAY_SIZE(filter),
2103                 .filter = filter,
2104         };
2105         long ret;
2106 
2107         /* Prepare some testable syscall results. */
2108         self->mytid = syscall(__NR_gettid);
2109         ASSERT_GT(self->mytid, 0);
2110         ASSERT_NE(self->mytid, 1) {
2111                 TH_LOG("Running this test as init is not supported. :)");
2112         }
2113 
2114         self->mypid = getpid();
2115         ASSERT_GT(self->mypid, 0);
2116         ASSERT_EQ(self->mytid, self->mypid);
2117 
2118         self->parent = getppid();
2119         ASSERT_GT(self->parent, 0);
2120         ASSERT_NE(self->parent, self->mypid);
2121 
2122         /* Launch tracer. */
2123         self->tracer = setup_trace_fixture(_metadata,
2124                                            variant->use_ptrace ? tracer_ptrace
2125                                                                : tracer_seccomp,
2126                                            self, variant->use_ptrace);
2127 
2128         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2129         ASSERT_EQ(0, ret);
2130 
2131         if (variant->use_ptrace)
2132                 return;
2133 
2134         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2135         ASSERT_EQ(0, ret);
2136 }
2137 
2138 FIXTURE_TEARDOWN(TRACE_syscall)
2139 {
2140         teardown_trace_fixture(_metadata, self->tracer);
2141 }
2142 
2143 TEST(negative_ENOSYS)
2144 {
2145         /*
2146          * There should be no difference between an "internal" skip
2147          * and userspace asking for syscall "-1".
2148          */
2149         errno = 0;
2150         EXPECT_EQ(-1, syscall(-1));
2151         EXPECT_EQ(errno, ENOSYS);
2152         /* And no difference for "still not valid but not -1". */
2153         errno = 0;
2154         EXPECT_EQ(-1, syscall(-101));
2155         EXPECT_EQ(errno, ENOSYS);
2156 }
2157 
2158 TEST_F(TRACE_syscall, negative_ENOSYS)
2159 {
2160         negative_ENOSYS(_metadata);
2161 }
2162 
2163 TEST_F(TRACE_syscall, syscall_allowed)
2164 {
2165         /* getppid works as expected (no changes). */
2166         EXPECT_EQ(self->parent, syscall(__NR_getppid));
2167         EXPECT_NE(self->mypid, syscall(__NR_getppid));
2168 }
2169 
2170 TEST_F(TRACE_syscall, syscall_redirected)
2171 {
2172         /* getpid has been redirected to getppid as expected. */
2173         EXPECT_EQ(self->parent, syscall(__NR_getpid));
2174         EXPECT_NE(self->mypid, syscall(__NR_getpid));
2175 }
2176 
2177 TEST_F(TRACE_syscall, syscall_errno)
2178 {
2179         /* Tracer should skip the open syscall, resulting in ESRCH. */
2180         EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
2181 }
2182 
2183 TEST_F(TRACE_syscall, syscall_faked)
2184 {
2185         /* Tracer skips the gettid syscall and store altered return value. */
2186         EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
2187 }
2188 
2189 TEST_F(TRACE_syscall, skip_after)
2190 {
2191         struct sock_filter filter[] = {
2192                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2193                         offsetof(struct seccomp_data, nr)),
2194                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2195                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
2196                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2197         };
2198         struct sock_fprog prog = {
2199                 .len = (unsigned short)ARRAY_SIZE(filter),
2200                 .filter = filter,
2201         };
2202         long ret;
2203 
2204         /* Install additional "errno on getppid" filter. */
2205         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2206         ASSERT_EQ(0, ret);
2207 
2208         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
2209         errno = 0;
2210         EXPECT_EQ(-1, syscall(__NR_getpid));
2211         EXPECT_EQ(EPERM, errno);
2212 }
2213 
2214 TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS)
2215 {
2216         struct sock_filter filter[] = {
2217                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2218                         offsetof(struct seccomp_data, nr)),
2219                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2220                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2221                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2222         };
2223         struct sock_fprog prog = {
2224                 .len = (unsigned short)ARRAY_SIZE(filter),
2225                 .filter = filter,
2226         };
2227         long ret;
2228 
2229         /* Install additional "death on getppid" filter. */
2230         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2231         ASSERT_EQ(0, ret);
2232 
2233         /* Tracer will redirect getpid to getppid, and we should die. */
2234         EXPECT_NE(self->mypid, syscall(__NR_getpid));
2235 }
2236 
2237 TEST(seccomp_syscall)
2238 {
2239         struct sock_filter filter[] = {
2240                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2241         };
2242         struct sock_fprog prog = {
2243                 .len = (unsigned short)ARRAY_SIZE(filter),
2244                 .filter = filter,
2245         };
2246         long ret;
2247 
2248         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2249         ASSERT_EQ(0, ret) {
2250                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2251         }
2252 
2253         /* Reject insane operation. */
2254         ret = seccomp(-1, 0, &prog);
2255         ASSERT_NE(ENOSYS, errno) {
2256                 TH_LOG("Kernel does not support seccomp syscall!");
2257         }
2258         EXPECT_EQ(EINVAL, errno) {
2259                 TH_LOG("Did not reject crazy op value!");
2260         }
2261 
2262         /* Reject strict with flags or pointer. */
2263         ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2264         EXPECT_EQ(EINVAL, errno) {
2265                 TH_LOG("Did not reject mode strict with flags!");
2266         }
2267         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2268         EXPECT_EQ(EINVAL, errno) {
2269                 TH_LOG("Did not reject mode strict with uargs!");
2270         }
2271 
2272         /* Reject insane args for filter. */
2273         ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2274         EXPECT_EQ(EINVAL, errno) {
2275                 TH_LOG("Did not reject crazy filter flags!");
2276         }
2277         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2278         EXPECT_EQ(EFAULT, errno) {
2279                 TH_LOG("Did not reject NULL filter!");
2280         }
2281 
2282         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2283         EXPECT_EQ(0, errno) {
2284                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2285                         strerror(errno));
2286         }
2287 }
2288 
2289 TEST(seccomp_syscall_mode_lock)
2290 {
2291         struct sock_filter filter[] = {
2292                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2293         };
2294         struct sock_fprog prog = {
2295                 .len = (unsigned short)ARRAY_SIZE(filter),
2296                 .filter = filter,
2297         };
2298         long ret;
2299 
2300         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2301         ASSERT_EQ(0, ret) {
2302                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2303         }
2304 
2305         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2306         ASSERT_NE(ENOSYS, errno) {
2307                 TH_LOG("Kernel does not support seccomp syscall!");
2308         }
2309         EXPECT_EQ(0, ret) {
2310                 TH_LOG("Could not install filter!");
2311         }
2312 
2313         /* Make sure neither entry point will switch to strict. */
2314         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2315         EXPECT_EQ(EINVAL, errno) {
2316                 TH_LOG("Switched to mode strict!");
2317         }
2318 
2319         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2320         EXPECT_EQ(EINVAL, errno) {
2321                 TH_LOG("Switched to mode strict!");
2322         }
2323 }
2324 
2325 /*
2326  * Test detection of known and unknown filter flags. Userspace needs to be able
2327  * to check if a filter flag is supported by the current kernel and a good way
2328  * of doing that is by attempting to enter filter mode, with the flag bit in
2329  * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2330  * that the flag is valid and EINVAL indicates that the flag is invalid.
2331  */
2332 TEST(detect_seccomp_filter_flags)
2333 {
2334         unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2335                                  SECCOMP_FILTER_FLAG_LOG,
2336                                  SECCOMP_FILTER_FLAG_SPEC_ALLOW,
2337                                  SECCOMP_FILTER_FLAG_NEW_LISTENER,
2338                                  SECCOMP_FILTER_FLAG_TSYNC_ESRCH };
2339         unsigned int exclusive[] = {
2340                                 SECCOMP_FILTER_FLAG_TSYNC,
2341                                 SECCOMP_FILTER_FLAG_NEW_LISTENER };
2342         unsigned int flag, all_flags, exclusive_mask;
2343         int i;
2344         long ret;
2345 
2346         /* Test detection of individual known-good filter flags */
2347         for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2348                 int bits = 0;
2349 
2350                 flag = flags[i];
2351                 /* Make sure the flag is a single bit! */
2352                 while (flag) {
2353                         if (flag & 0x1)
2354                                 bits ++;
2355                         flag >>= 1;
2356                 }
2357                 ASSERT_EQ(1, bits);
2358                 flag = flags[i];
2359 
2360                 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2361                 ASSERT_NE(ENOSYS, errno) {
2362                         TH_LOG("Kernel does not support seccomp syscall!");
2363                 }
2364                 EXPECT_EQ(-1, ret);
2365                 EXPECT_EQ(EFAULT, errno) {
2366                         TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2367                                flag);
2368                 }
2369 
2370                 all_flags |= flag;
2371         }
2372 
2373         /*
2374          * Test detection of all known-good filter flags combined. But
2375          * for the exclusive flags we need to mask them out and try them
2376          * individually for the "all flags" testing.
2377          */
2378         exclusive_mask = 0;
2379         for (i = 0; i < ARRAY_SIZE(exclusive); i++)
2380                 exclusive_mask |= exclusive[i];
2381         for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
2382                 flag = all_flags & ~exclusive_mask;
2383                 flag |= exclusive[i];
2384 
2385                 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2386                 EXPECT_EQ(-1, ret);
2387                 EXPECT_EQ(EFAULT, errno) {
2388                         TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2389                                flag);
2390                 }
2391         }
2392 
2393         /* Test detection of an unknown filter flags, without exclusives. */
2394         flag = -1;
2395         flag &= ~exclusive_mask;
2396         ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2397         EXPECT_EQ(-1, ret);
2398         EXPECT_EQ(EINVAL, errno) {
2399                 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2400                        flag);
2401         }
2402 
2403         /*
2404          * Test detection of an unknown filter flag that may simply need to be
2405          * added to this test
2406          */
2407         flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2408         ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2409         EXPECT_EQ(-1, ret);
2410         EXPECT_EQ(EINVAL, errno) {
2411                 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2412                        flag);
2413         }
2414 }
2415 
2416 TEST(TSYNC_first)
2417 {
2418         struct sock_filter filter[] = {
2419                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2420         };
2421         struct sock_fprog prog = {
2422                 .len = (unsigned short)ARRAY_SIZE(filter),
2423                 .filter = filter,
2424         };
2425         long ret;
2426 
2427         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2428         ASSERT_EQ(0, ret) {
2429                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2430         }
2431 
2432         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2433                       &prog);
2434         ASSERT_NE(ENOSYS, errno) {
2435                 TH_LOG("Kernel does not support seccomp syscall!");
2436         }
2437         EXPECT_EQ(0, ret) {
2438                 TH_LOG("Could not install initial filter with TSYNC!");
2439         }
2440 }
2441 
2442 #define TSYNC_SIBLINGS 2
2443 struct tsync_sibling {
2444         pthread_t tid;
2445         pid_t system_tid;
2446         sem_t *started;
2447         pthread_cond_t *cond;
2448         pthread_mutex_t *mutex;
2449         int diverge;
2450         int num_waits;
2451         struct sock_fprog *prog;
2452         struct __test_metadata *metadata;
2453 };
2454 
2455 /*
2456  * To avoid joining joined threads (which is not allowed by Bionic),
2457  * make sure we both successfully join and clear the tid to skip a
2458  * later join attempt during fixture teardown. Any remaining threads
2459  * will be directly killed during teardown.
2460  */
2461 #define PTHREAD_JOIN(tid, status)                                       \
2462         do {                                                            \
2463                 int _rc = pthread_join(tid, status);                    \
2464                 if (_rc) {                                              \
2465                         TH_LOG("pthread_join of tid %u failed: %d\n",   \
2466                                 (unsigned int)tid, _rc);                \
2467                 } else {                                                \
2468                         tid = 0;                                        \
2469                 }                                                       \
2470         } while (0)
2471 
2472 FIXTURE(TSYNC) {
2473         struct sock_fprog root_prog, apply_prog;
2474         struct tsync_sibling sibling[TSYNC_SIBLINGS];
2475         sem_t started;
2476         pthread_cond_t cond;
2477         pthread_mutex_t mutex;
2478         int sibling_count;
2479 };
2480 
2481 FIXTURE_SETUP(TSYNC)
2482 {
2483         struct sock_filter root_filter[] = {
2484                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2485         };
2486         struct sock_filter apply_filter[] = {
2487                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2488                         offsetof(struct seccomp_data, nr)),
2489                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2490                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2491                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2492         };
2493 
2494         memset(&self->root_prog, 0, sizeof(self->root_prog));
2495         memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2496         memset(&self->sibling, 0, sizeof(self->sibling));
2497         self->root_prog.filter = malloc(sizeof(root_filter));
2498         ASSERT_NE(NULL, self->root_prog.filter);
2499         memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2500         self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2501 
2502         self->apply_prog.filter = malloc(sizeof(apply_filter));
2503         ASSERT_NE(NULL, self->apply_prog.filter);
2504         memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2505         self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2506 
2507         self->sibling_count = 0;
2508         pthread_mutex_init(&self->mutex, NULL);
2509         pthread_cond_init(&self->cond, NULL);
2510         sem_init(&self->started, 0, 0);
2511         self->sibling[0].tid = 0;
2512         self->sibling[0].cond = &self->cond;
2513         self->sibling[0].started = &self->started;
2514         self->sibling[0].mutex = &self->mutex;
2515         self->sibling[0].diverge = 0;
2516         self->sibling[0].num_waits = 1;
2517         self->sibling[0].prog = &self->root_prog;
2518         self->sibling[0].metadata = _metadata;
2519         self->sibling[1].tid = 0;
2520         self->sibling[1].cond = &self->cond;
2521         self->sibling[1].started = &self->started;
2522         self->sibling[1].mutex = &self->mutex;
2523         self->sibling[1].diverge = 0;
2524         self->sibling[1].prog = &self->root_prog;
2525         self->sibling[1].num_waits = 1;
2526         self->sibling[1].metadata = _metadata;
2527 }
2528 
2529 FIXTURE_TEARDOWN(TSYNC)
2530 {
2531         int sib = 0;
2532 
2533         if (self->root_prog.filter)
2534                 free(self->root_prog.filter);
2535         if (self->apply_prog.filter)
2536                 free(self->apply_prog.filter);
2537 
2538         for ( ; sib < self->sibling_count; ++sib) {
2539                 struct tsync_sibling *s = &self->sibling[sib];
2540 
2541                 if (!s->tid)
2542                         continue;
2543                 /*
2544                  * If a thread is still running, it may be stuck, so hit
2545                  * it over the head really hard.
2546                  */
2547                 pthread_kill(s->tid, 9);
2548         }
2549         pthread_mutex_destroy(&self->mutex);
2550         pthread_cond_destroy(&self->cond);
2551         sem_destroy(&self->started);
2552 }
2553 
2554 void *tsync_sibling(void *data)
2555 {
2556         long ret = 0;
2557         struct tsync_sibling *me = data;
2558 
2559         me->system_tid = syscall(__NR_gettid);
2560 
2561         pthread_mutex_lock(me->mutex);
2562         if (me->diverge) {
2563                 /* Just re-apply the root prog to fork the tree */
2564                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2565                                 me->prog, 0, 0);
2566         }
2567         sem_post(me->started);
2568         /* Return outside of started so parent notices failures. */
2569         if (ret) {
2570                 pthread_mutex_unlock(me->mutex);
2571                 return (void *)SIBLING_EXIT_FAILURE;
2572         }
2573         do {
2574                 pthread_cond_wait(me->cond, me->mutex);
2575                 me->num_waits = me->num_waits - 1;
2576         } while (me->num_waits);
2577         pthread_mutex_unlock(me->mutex);
2578 
2579         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2580         if (!ret)
2581                 return (void *)SIBLING_EXIT_NEWPRIVS;
2582         read(0, NULL, 0);
2583         return (void *)SIBLING_EXIT_UNKILLED;
2584 }
2585 
2586 void tsync_start_sibling(struct tsync_sibling *sibling)
2587 {
2588         pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2589 }
2590 
2591 TEST_F(TSYNC, siblings_fail_prctl)
2592 {
2593         long ret;
2594         void *status;
2595         struct sock_filter filter[] = {
2596                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2597                         offsetof(struct seccomp_data, nr)),
2598                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2599                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2600                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2601         };
2602         struct sock_fprog prog = {
2603                 .len = (unsigned short)ARRAY_SIZE(filter),
2604                 .filter = filter,
2605         };
2606 
2607         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2608                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2609         }
2610 
2611         /* Check prctl failure detection by requesting sib 0 diverge. */
2612         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2613         ASSERT_NE(ENOSYS, errno) {
2614                 TH_LOG("Kernel does not support seccomp syscall!");
2615         }
2616         ASSERT_EQ(0, ret) {
2617                 TH_LOG("setting filter failed");
2618         }
2619 
2620         self->sibling[0].diverge = 1;
2621         tsync_start_sibling(&self->sibling[0]);
2622         tsync_start_sibling(&self->sibling[1]);
2623 
2624         while (self->sibling_count < TSYNC_SIBLINGS) {
2625                 sem_wait(&self->started);
2626                 self->sibling_count++;
2627         }
2628 
2629         /* Signal the threads to clean up*/
2630         pthread_mutex_lock(&self->mutex);
2631         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2632                 TH_LOG("cond broadcast non-zero");
2633         }
2634         pthread_mutex_unlock(&self->mutex);
2635 
2636         /* Ensure diverging sibling failed to call prctl. */
2637         PTHREAD_JOIN(self->sibling[0].tid, &status);
2638         EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2639         PTHREAD_JOIN(self->sibling[1].tid, &status);
2640         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2641 }
2642 
2643 TEST_F(TSYNC, two_siblings_with_ancestor)
2644 {
2645         long ret;
2646         void *status;
2647 
2648         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2649                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2650         }
2651 
2652         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2653         ASSERT_NE(ENOSYS, errno) {
2654                 TH_LOG("Kernel does not support seccomp syscall!");
2655         }
2656         ASSERT_EQ(0, ret) {
2657                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2658         }
2659         tsync_start_sibling(&self->sibling[0]);
2660         tsync_start_sibling(&self->sibling[1]);
2661 
2662         while (self->sibling_count < TSYNC_SIBLINGS) {
2663                 sem_wait(&self->started);
2664                 self->sibling_count++;
2665         }
2666 
2667         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2668                       &self->apply_prog);
2669         ASSERT_EQ(0, ret) {
2670                 TH_LOG("Could install filter on all threads!");
2671         }
2672         /* Tell the siblings to test the policy */
2673         pthread_mutex_lock(&self->mutex);
2674         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2675                 TH_LOG("cond broadcast non-zero");
2676         }
2677         pthread_mutex_unlock(&self->mutex);
2678         /* Ensure they are both killed and don't exit cleanly. */
2679         PTHREAD_JOIN(self->sibling[0].tid, &status);
2680         EXPECT_EQ(0x0, (long)status);
2681         PTHREAD_JOIN(self->sibling[1].tid, &status);
2682         EXPECT_EQ(0x0, (long)status);
2683 }
2684 
2685 TEST_F(TSYNC, two_sibling_want_nnp)
2686 {
2687         void *status;
2688 
2689         /* start siblings before any prctl() operations */
2690         tsync_start_sibling(&self->sibling[0]);
2691         tsync_start_sibling(&self->sibling[1]);
2692         while (self->sibling_count < TSYNC_SIBLINGS) {
2693                 sem_wait(&self->started);
2694                 self->sibling_count++;
2695         }
2696 
2697         /* Tell the siblings to test no policy */
2698         pthread_mutex_lock(&self->mutex);
2699         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2700                 TH_LOG("cond broadcast non-zero");
2701         }
2702         pthread_mutex_unlock(&self->mutex);
2703 
2704         /* Ensure they are both upset about lacking nnp. */
2705         PTHREAD_JOIN(self->sibling[0].tid, &status);
2706         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2707         PTHREAD_JOIN(self->sibling[1].tid, &status);
2708         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2709 }
2710 
2711 TEST_F(TSYNC, two_siblings_with_no_filter)
2712 {
2713         long ret;
2714         void *status;
2715 
2716         /* start siblings before any prctl() operations */
2717         tsync_start_sibling(&self->sibling[0]);
2718         tsync_start_sibling(&self->sibling[1]);
2719         while (self->sibling_count < TSYNC_SIBLINGS) {
2720                 sem_wait(&self->started);
2721                 self->sibling_count++;
2722         }
2723 
2724         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2725                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2726         }
2727 
2728         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2729                       &self->apply_prog);
2730         ASSERT_NE(ENOSYS, errno) {
2731                 TH_LOG("Kernel does not support seccomp syscall!");
2732         }
2733         ASSERT_EQ(0, ret) {
2734                 TH_LOG("Could install filter on all threads!");
2735         }
2736 
2737         /* Tell the siblings to test the policy */
2738         pthread_mutex_lock(&self->mutex);
2739         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2740                 TH_LOG("cond broadcast non-zero");
2741         }
2742         pthread_mutex_unlock(&self->mutex);
2743 
2744         /* Ensure they are both killed and don't exit cleanly. */
2745         PTHREAD_JOIN(self->sibling[0].tid, &status);
2746         EXPECT_EQ(0x0, (long)status);
2747         PTHREAD_JOIN(self->sibling[1].tid, &status);
2748         EXPECT_EQ(0x0, (long)status);
2749 }
2750 
2751 TEST_F(TSYNC, two_siblings_with_one_divergence)
2752 {
2753         long ret;
2754         void *status;
2755 
2756         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2757                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2758         }
2759 
2760         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2761         ASSERT_NE(ENOSYS, errno) {
2762                 TH_LOG("Kernel does not support seccomp syscall!");
2763         }
2764         ASSERT_EQ(0, ret) {
2765                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2766         }
2767         self->sibling[0].diverge = 1;
2768         tsync_start_sibling(&self->sibling[0]);
2769         tsync_start_sibling(&self->sibling[1]);
2770 
2771         while (self->sibling_count < TSYNC_SIBLINGS) {
2772                 sem_wait(&self->started);
2773                 self->sibling_count++;
2774         }
2775 
2776         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2777                       &self->apply_prog);
2778         ASSERT_EQ(self->sibling[0].system_tid, ret) {
2779                 TH_LOG("Did not fail on diverged sibling.");
2780         }
2781 
2782         /* Wake the threads */
2783         pthread_mutex_lock(&self->mutex);
2784         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2785                 TH_LOG("cond broadcast non-zero");
2786         }
2787         pthread_mutex_unlock(&self->mutex);
2788 
2789         /* Ensure they are both unkilled. */
2790         PTHREAD_JOIN(self->sibling[0].tid, &status);
2791         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2792         PTHREAD_JOIN(self->sibling[1].tid, &status);
2793         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2794 }
2795 
2796 TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
2797 {
2798         long ret, flags;
2799         void *status;
2800 
2801         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2802                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2803         }
2804 
2805         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2806         ASSERT_NE(ENOSYS, errno) {
2807                 TH_LOG("Kernel does not support seccomp syscall!");
2808         }
2809         ASSERT_EQ(0, ret) {
2810                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2811         }
2812         self->sibling[0].diverge = 1;
2813         tsync_start_sibling(&self->sibling[0]);
2814         tsync_start_sibling(&self->sibling[1]);
2815 
2816         while (self->sibling_count < TSYNC_SIBLINGS) {
2817                 sem_wait(&self->started);
2818                 self->sibling_count++;
2819         }
2820 
2821         flags = SECCOMP_FILTER_FLAG_TSYNC | \
2822                 SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
2823         ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog);
2824         ASSERT_EQ(ESRCH, errno) {
2825                 TH_LOG("Did not return ESRCH for diverged sibling.");
2826         }
2827         ASSERT_EQ(-1, ret) {
2828                 TH_LOG("Did not fail on diverged sibling.");
2829         }
2830 
2831         /* Wake the threads */
2832         pthread_mutex_lock(&self->mutex);
2833         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2834                 TH_LOG("cond broadcast non-zero");
2835         }
2836         pthread_mutex_unlock(&self->mutex);
2837 
2838         /* Ensure they are both unkilled. */
2839         PTHREAD_JOIN(self->sibling[0].tid, &status);
2840         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2841         PTHREAD_JOIN(self->sibling[1].tid, &status);
2842         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2843 }
2844 
2845 TEST_F(TSYNC, two_siblings_not_under_filter)
2846 {
2847         long ret, sib;
2848         void *status;
2849         struct timespec delay = { .tv_nsec = 100000000 };
2850 
2851         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2852                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2853         }
2854 
2855         /*
2856          * Sibling 0 will have its own seccomp policy
2857          * and Sibling 1 will not be under seccomp at
2858          * all. Sibling 1 will enter seccomp and 0
2859          * will cause failure.
2860          */
2861         self->sibling[0].diverge = 1;
2862         tsync_start_sibling(&self->sibling[0]);
2863         tsync_start_sibling(&self->sibling[1]);
2864 
2865         while (self->sibling_count < TSYNC_SIBLINGS) {
2866                 sem_wait(&self->started);
2867                 self->sibling_count++;
2868         }
2869 
2870         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2871         ASSERT_NE(ENOSYS, errno) {
2872                 TH_LOG("Kernel does not support seccomp syscall!");
2873         }
2874         ASSERT_EQ(0, ret) {
2875                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2876         }
2877 
2878         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2879                       &self->apply_prog);
2880         ASSERT_EQ(ret, self->sibling[0].system_tid) {
2881                 TH_LOG("Did not fail on diverged sibling.");
2882         }
2883         sib = 1;
2884         if (ret == self->sibling[0].system_tid)
2885                 sib = 0;
2886 
2887         pthread_mutex_lock(&self->mutex);
2888 
2889         /* Increment the other siblings num_waits so we can clean up
2890          * the one we just saw.
2891          */
2892         self->sibling[!sib].num_waits += 1;
2893 
2894         /* Signal the thread to clean up*/
2895         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2896                 TH_LOG("cond broadcast non-zero");
2897         }
2898         pthread_mutex_unlock(&self->mutex);
2899         PTHREAD_JOIN(self->sibling[sib].tid, &status);
2900         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2901         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2902         while (!kill(self->sibling[sib].system_tid, 0))
2903                 nanosleep(&delay, NULL);
2904         /* Switch to the remaining sibling */
2905         sib = !sib;
2906 
2907         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2908                       &self->apply_prog);
2909         ASSERT_EQ(0, ret) {
2910                 TH_LOG("Expected the remaining sibling to sync");
2911         };
2912 
2913         pthread_mutex_lock(&self->mutex);
2914 
2915         /* If remaining sibling didn't have a chance to wake up during
2916          * the first broadcast, manually reduce the num_waits now.
2917          */
2918         if (self->sibling[sib].num_waits > 1)
2919                 self->sibling[sib].num_waits = 1;
2920         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2921                 TH_LOG("cond broadcast non-zero");
2922         }
2923         pthread_mutex_unlock(&self->mutex);
2924         PTHREAD_JOIN(self->sibling[sib].tid, &status);
2925         EXPECT_EQ(0, (long)status);
2926         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2927         while (!kill(self->sibling[sib].system_tid, 0))
2928                 nanosleep(&delay, NULL);
2929 
2930         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2931                       &self->apply_prog);
2932         ASSERT_EQ(0, ret);  /* just us chickens */
2933 }
2934 
2935 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
2936 TEST(syscall_restart)
2937 {
2938         long ret;
2939         unsigned long msg;
2940         pid_t child_pid;
2941         int pipefd[2];
2942         int status;
2943         siginfo_t info = { };
2944         struct sock_filter filter[] = {
2945                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2946                          offsetof(struct seccomp_data, nr)),
2947 
2948 #ifdef __NR_sigreturn
2949                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0),
2950 #endif
2951                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0),
2952                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0),
2953                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0),
2954                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0),
2955                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0),
2956                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2957 
2958                 /* Allow __NR_write for easy logging. */
2959                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2960                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2961                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2962                 /* The nanosleep jump target. */
2963                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2964                 /* The restart_syscall jump target. */
2965                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2966         };
2967         struct sock_fprog prog = {
2968                 .len = (unsigned short)ARRAY_SIZE(filter),
2969                 .filter = filter,
2970         };
2971 #if defined(__arm__)
2972         struct utsname utsbuf;
2973 #endif
2974 
2975         ASSERT_EQ(0, pipe(pipefd));
2976 
2977         child_pid = fork();
2978         ASSERT_LE(0, child_pid);
2979         if (child_pid == 0) {
2980                 /* Child uses EXPECT not ASSERT to deliver status correctly. */
2981                 char buf = ' ';
2982                 struct timespec timeout = { };
2983 
2984                 /* Attach parent as tracer and stop. */
2985                 EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2986                 EXPECT_EQ(0, raise(SIGSTOP));
2987 
2988                 EXPECT_EQ(0, close(pipefd[1]));
2989 
2990                 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2991                         TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2992                 }
2993 
2994                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2995                 EXPECT_EQ(0, ret) {
2996                         TH_LOG("Failed to install filter!");
2997                 }
2998 
2999                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
3000                         TH_LOG("Failed to read() sync from parent");
3001                 }
3002                 EXPECT_EQ('.', buf) {
3003                         TH_LOG("Failed to get sync data from read()");
3004                 }
3005 
3006                 /* Start nanosleep to be interrupted. */
3007                 timeout.tv_sec = 1;
3008                 errno = 0;
3009                 EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
3010                         TH_LOG("Call to nanosleep() failed (errno %d)", errno);
3011                 }
3012 
3013                 /* Read final sync from parent. */
3014                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
3015                         TH_LOG("Failed final read() from parent");
3016                 }
3017                 EXPECT_EQ('!', buf) {
3018                         TH_LOG("Failed to get final data from read()");
3019                 }
3020 
3021                 /* Directly report the status of our test harness results. */
3022                 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
3023                                                      : EXIT_FAILURE);
3024         }
3025         EXPECT_EQ(0, close(pipefd[0]));
3026 
3027         /* Attach to child, setup options, and release. */
3028         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3029         ASSERT_EQ(true, WIFSTOPPED(status));
3030         ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
3031                             PTRACE_O_TRACESECCOMP));
3032         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
3033         ASSERT_EQ(1, write(pipefd[1], ".", 1));
3034 
3035         /* Wait for nanosleep() to start. */
3036         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3037         ASSERT_EQ(true, WIFSTOPPED(status));
3038         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
3039         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
3040         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
3041         ASSERT_EQ(0x100, msg);
3042         ret = get_syscall(_metadata, child_pid);
3043         EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep);
3044 
3045         /* Might as well check siginfo for sanity while we're here. */
3046         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
3047         ASSERT_EQ(SIGTRAP, info.si_signo);
3048         ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
3049         EXPECT_EQ(0, info.si_errno);
3050         EXPECT_EQ(getuid(), info.si_uid);
3051         /* Verify signal delivery came from child (seccomp-triggered). */
3052         EXPECT_EQ(child_pid, info.si_pid);
3053 
3054         /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
3055         ASSERT_EQ(0, kill(child_pid, SIGSTOP));
3056         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
3057         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3058         ASSERT_EQ(true, WIFSTOPPED(status));
3059         ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
3060         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
3061         /*
3062          * There is no siginfo on SIGSTOP any more, so we can't verify
3063          * signal delivery came from parent now (getpid() == info.si_pid).
3064          * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
3065          * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
3066          */
3067         EXPECT_EQ(SIGSTOP, info.si_signo);
3068 
3069         /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
3070         ASSERT_EQ(0, kill(child_pid, SIGCONT));
3071         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
3072         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3073         ASSERT_EQ(true, WIFSTOPPED(status));
3074         ASSERT_EQ(SIGCONT, WSTOPSIG(status));
3075         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
3076 
3077         /* Wait for restart_syscall() to start. */
3078         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3079         ASSERT_EQ(true, WIFSTOPPED(status));
3080         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
3081         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
3082         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
3083 
3084         ASSERT_EQ(0x200, msg);
3085         ret = get_syscall(_metadata, child_pid);
3086 #if defined(__arm__)
3087         /*
3088          * FIXME:
3089          * - native ARM registers do NOT expose true syscall.
3090          * - compat ARM registers on ARM64 DO expose true syscall.
3091          */
3092         ASSERT_EQ(0, uname(&utsbuf));
3093         if (strncmp(utsbuf.machine, "arm", 3) == 0) {
3094                 EXPECT_EQ(__NR_nanosleep, ret);
3095         } else
3096 #endif
3097         {
3098                 EXPECT_EQ(__NR_restart_syscall, ret);
3099         }
3100 
3101         /* Write again to end test. */
3102         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
3103         ASSERT_EQ(1, write(pipefd[1], "!", 1));
3104         EXPECT_EQ(0, close(pipefd[1]));
3105 
3106         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
3107         if (WIFSIGNALED(status) || WEXITSTATUS(status))
3108                 _metadata->passed = 0;
3109 }
3110 
3111 TEST_SIGNAL(filter_flag_log, SIGSYS)
3112 {
3113         struct sock_filter allow_filter[] = {
3114                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3115         };
3116         struct sock_filter kill_filter[] = {
3117                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
3118                         offsetof(struct seccomp_data, nr)),
3119                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
3120                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
3121                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3122         };
3123         struct sock_fprog allow_prog = {
3124                 .len = (unsigned short)ARRAY_SIZE(allow_filter),
3125                 .filter = allow_filter,
3126         };
3127         struct sock_fprog kill_prog = {
3128                 .len = (unsigned short)ARRAY_SIZE(kill_filter),
3129                 .filter = kill_filter,
3130         };
3131         long ret;
3132         pid_t parent = getppid();
3133 
3134         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3135         ASSERT_EQ(0, ret);
3136 
3137         /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
3138         ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
3139                       &allow_prog);
3140         ASSERT_NE(ENOSYS, errno) {
3141                 TH_LOG("Kernel does not support seccomp syscall!");
3142         }
3143         EXPECT_NE(0, ret) {
3144                 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
3145         }
3146         EXPECT_EQ(EINVAL, errno) {
3147                 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
3148         }
3149 
3150         /* Verify that a simple, permissive filter can be added with no flags */
3151         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
3152         EXPECT_EQ(0, ret);
3153 
3154         /* See if the same filter can be added with the FILTER_FLAG_LOG flag */
3155         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
3156                       &allow_prog);
3157         ASSERT_NE(EINVAL, errno) {
3158                 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
3159         }
3160         EXPECT_EQ(0, ret);
3161 
3162         /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
3163         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
3164                       &kill_prog);
3165         EXPECT_EQ(0, ret);
3166 
3167         EXPECT_EQ(parent, syscall(__NR_getppid));
3168         /* getpid() should never return. */
3169         EXPECT_EQ(0, syscall(__NR_getpid));
3170 }
3171 
3172 TEST(get_action_avail)
3173 {
3174         __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
3175                             SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
3176                             SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
3177         __u32 unknown_action = 0x10000000U;
3178         int i;
3179         long ret;
3180 
3181         ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
3182         ASSERT_NE(ENOSYS, errno) {
3183                 TH_LOG("Kernel does not support seccomp syscall!");
3184         }
3185         ASSERT_NE(EINVAL, errno) {
3186                 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
3187         }
3188         EXPECT_EQ(ret, 0);
3189 
3190         for (i = 0; i < ARRAY_SIZE(actions); i++) {
3191                 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
3192                 EXPECT_EQ(ret, 0) {
3193                         TH_LOG("Expected action (0x%X) not available!",
3194                                actions[i]);
3195                 }
3196         }
3197 
3198         /* Check that an unknown action is handled properly (EOPNOTSUPP) */
3199         ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
3200         EXPECT_EQ(ret, -1);
3201         EXPECT_EQ(errno, EOPNOTSUPP);
3202 }
3203 
3204 TEST(get_metadata)
3205 {
3206         pid_t pid;
3207         int pipefd[2];
3208         char buf;
3209         struct seccomp_metadata md;
3210         long ret;
3211 
3212         /* Only real root can get metadata. */
3213         if (geteuid()) {
3214                 SKIP(return, "get_metadata requires real root");
3215                 return;
3216         }
3217 
3218         ASSERT_EQ(0, pipe(pipefd));
3219 
3220         pid = fork();
3221         ASSERT_GE(pid, 0);
3222         if (pid == 0) {
3223                 struct sock_filter filter[] = {
3224                         BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3225                 };
3226                 struct sock_fprog prog = {
3227                         .len = (unsigned short)ARRAY_SIZE(filter),
3228                         .filter = filter,
3229                 };
3230 
3231                 /* one with log, one without */
3232                 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
3233                                      SECCOMP_FILTER_FLAG_LOG, &prog));
3234                 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
3235 
3236                 EXPECT_EQ(0, close(pipefd[0]));
3237                 ASSERT_EQ(1, write(pipefd[1], "1", 1));
3238                 ASSERT_EQ(0, close(pipefd[1]));
3239 
3240                 while (1)
3241                         sleep(100);
3242         }
3243 
3244         ASSERT_EQ(0, close(pipefd[1]));
3245         ASSERT_EQ(1, read(pipefd[0], &buf, 1));
3246 
3247         ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
3248         ASSERT_EQ(pid, waitpid(pid, NULL, 0));
3249 
3250         /* Past here must not use ASSERT or child process is never killed. */
3251 
3252         md.filter_off = 0;
3253         errno = 0;
3254         ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3255         EXPECT_EQ(sizeof(md), ret) {
3256                 if (errno == EINVAL)
3257                         SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
3258         }
3259 
3260         EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
3261         EXPECT_EQ(md.filter_off, 0);
3262 
3263         md.filter_off = 1;
3264         ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3265         EXPECT_EQ(sizeof(md), ret);
3266         EXPECT_EQ(md.flags, 0);
3267         EXPECT_EQ(md.filter_off, 1);
3268 
3269 skip:
3270         ASSERT_EQ(0, kill(pid, SIGKILL));
3271 }
3272 
3273 static int user_notif_syscall(int nr, unsigned int flags)
3274 {
3275         struct sock_filter filter[] = {
3276                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
3277                         offsetof(struct seccomp_data, nr)),
3278                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
3279                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
3280                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3281         };
3282 
3283         struct sock_fprog prog = {
3284                 .len = (unsigned short)ARRAY_SIZE(filter),
3285                 .filter = filter,
3286         };
3287 
3288         return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
3289 }
3290 
3291 #define USER_NOTIF_MAGIC INT_MAX
3292 TEST(user_notification_basic)
3293 {
3294         pid_t pid;
3295         long ret;
3296         int status, listener;
3297         struct seccomp_notif req = {};
3298         struct seccomp_notif_resp resp = {};
3299         struct pollfd pollfd;
3300 
3301         struct sock_filter filter[] = {
3302                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3303         };
3304         struct sock_fprog prog = {
3305                 .len = (unsigned short)ARRAY_SIZE(filter),
3306                 .filter = filter,
3307         };
3308 
3309         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3310         ASSERT_EQ(0, ret) {
3311                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3312         }
3313 
3314         pid = fork();
3315         ASSERT_GE(pid, 0);
3316 
3317         /* Check that we get -ENOSYS with no listener attached */
3318         if (pid == 0) {
3319                 if (user_notif_syscall(__NR_getppid, 0) < 0)
3320                         exit(1);
3321                 ret = syscall(__NR_getppid);
3322                 exit(ret >= 0 || errno != ENOSYS);
3323         }
3324 
3325         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3326         EXPECT_EQ(true, WIFEXITED(status));
3327         EXPECT_EQ(0, WEXITSTATUS(status));
3328 
3329         /* Add some no-op filters for grins. */
3330         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3331         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3332         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3333         EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3334 
3335         /* Check that the basic notification machinery works */
3336         listener = user_notif_syscall(__NR_getppid,
3337                                       SECCOMP_FILTER_FLAG_NEW_LISTENER);
3338         ASSERT_GE(listener, 0);
3339 
3340         /* Installing a second listener in the chain should EBUSY */
3341         EXPECT_EQ(user_notif_syscall(__NR_getppid,
3342                                      SECCOMP_FILTER_FLAG_NEW_LISTENER),
3343                   -1);
3344         EXPECT_EQ(errno, EBUSY);
3345 
3346         pid = fork();
3347         ASSERT_GE(pid, 0);
3348 
3349         if (pid == 0) {
3350                 ret = syscall(__NR_getppid);
3351                 exit(ret != USER_NOTIF_MAGIC);
3352         }
3353 
3354         pollfd.fd = listener;
3355         pollfd.events = POLLIN | POLLOUT;
3356 
3357         EXPECT_GT(poll(&pollfd, 1, -1), 0);
3358         EXPECT_EQ(pollfd.revents, POLLIN);
3359 
3360         /* Test that we can't pass garbage to the kernel. */
3361         memset(&req, 0, sizeof(req));
3362         req.pid = -1;
3363         errno = 0;
3364         ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
3365         EXPECT_EQ(-1, ret);
3366         EXPECT_EQ(EINVAL, errno);
3367 
3368         if (ret) {
3369                 req.pid = 0;
3370                 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3371         }
3372 
3373         pollfd.fd = listener;
3374         pollfd.events = POLLIN | POLLOUT;
3375 
3376         EXPECT_GT(poll(&pollfd, 1, -1), 0);
3377         EXPECT_EQ(pollfd.revents, POLLOUT);
3378 
3379         EXPECT_EQ(req.data.nr,  __NR_getppid);
3380 
3381         resp.id = req.id;
3382         resp.error = 0;
3383         resp.val = USER_NOTIF_MAGIC;
3384 
3385         /* check that we make sure flags == 0 */
3386         resp.flags = 1;
3387         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3388         EXPECT_EQ(errno, EINVAL);
3389 
3390         resp.flags = 0;
3391         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3392 
3393         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3394         EXPECT_EQ(true, WIFEXITED(status));
3395         EXPECT_EQ(0, WEXITSTATUS(status));
3396 }
3397 
3398 TEST(user_notification_with_tsync)
3399 {
3400         int ret;
3401         unsigned int flags;
3402 
3403         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3404         ASSERT_EQ(0, ret) {
3405                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3406         }
3407 
3408         /* these were exclusive */
3409         flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
3410                 SECCOMP_FILTER_FLAG_TSYNC;
3411         ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags));
3412         ASSERT_EQ(EINVAL, errno);
3413 
3414         /* but now they're not */
3415         flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
3416         ret = user_notif_syscall(__NR_getppid, flags);
3417         close(ret);
3418         ASSERT_LE(0, ret);
3419 }
3420 
3421 TEST(user_notification_kill_in_middle)
3422 {
3423         pid_t pid;
3424         long ret;
3425         int listener;
3426         struct seccomp_notif req = {};
3427         struct seccomp_notif_resp resp = {};
3428 
3429         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3430         ASSERT_EQ(0, ret) {
3431                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3432         }
3433 
3434         listener = user_notif_syscall(__NR_getppid,
3435                                       SECCOMP_FILTER_FLAG_NEW_LISTENER);
3436         ASSERT_GE(listener, 0);
3437 
3438         /*
3439          * Check that nothing bad happens when we kill the task in the middle
3440          * of a syscall.
3441          */
3442         pid = fork();
3443         ASSERT_GE(pid, 0);
3444 
3445         if (pid == 0) {
3446                 ret = syscall(__NR_getppid);
3447                 exit(ret != USER_NOTIF_MAGIC);
3448         }
3449 
3450         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3451         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0);
3452 
3453         EXPECT_EQ(kill(pid, SIGKILL), 0);
3454         EXPECT_EQ(waitpid(pid, NULL, 0), pid);
3455 
3456         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1);
3457 
3458         resp.id = req.id;
3459         ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp);
3460         EXPECT_EQ(ret, -1);
3461         EXPECT_EQ(errno, ENOENT);
3462 }
3463 
3464 static int handled = -1;
3465 
3466 static void signal_handler(int signal)
3467 {
3468         if (write(handled, "c", 1) != 1)
3469                 perror("write from signal");
3470 }
3471 
3472 TEST(user_notification_signal)
3473 {
3474         pid_t pid;
3475         long ret;
3476         int status, listener, sk_pair[2];
3477         struct seccomp_notif req = {};
3478         struct seccomp_notif_resp resp = {};
3479         char c;
3480 
3481         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3482         ASSERT_EQ(0, ret) {
3483                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3484         }
3485 
3486         ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
3487 
3488         listener = user_notif_syscall(__NR_gettid,
3489                                       SECCOMP_FILTER_FLAG_NEW_LISTENER);
3490         ASSERT_GE(listener, 0);
3491 
3492         pid = fork();
3493         ASSERT_GE(pid, 0);
3494 
3495         if (pid == 0) {
3496                 close(sk_pair[0]);
3497                 handled = sk_pair[1];
3498                 if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
3499                         perror("signal");
3500                         exit(1);
3501                 }
3502                 /*
3503                  * ERESTARTSYS behavior is a bit hard to test, because we need
3504                  * to rely on a signal that has not yet been handled. Let's at
3505                  * least check that the error code gets propagated through, and
3506                  * hope that it doesn't break when there is actually a signal :)
3507                  */
3508                 ret = syscall(__NR_gettid);
3509                 exit(!(ret == -1 && errno == 512));
3510         }
3511 
3512         close(sk_pair[1]);
3513 
3514         memset(&req, 0, sizeof(req));
3515         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3516 
3517         EXPECT_EQ(kill(pid, SIGUSR1), 0);
3518 
3519         /*
3520          * Make sure the signal really is delivered, which means we're not
3521          * stuck in the user notification code any more and the notification
3522          * should be dead.
3523          */
3524         EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
3525 
3526         resp.id = req.id;
3527         resp.error = -EPERM;
3528         resp.val = 0;
3529 
3530         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3531         EXPECT_EQ(errno, ENOENT);
3532 
3533         memset(&req, 0, sizeof(req));
3534         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3535 
3536         resp.id = req.id;
3537         resp.error = -512; /* -ERESTARTSYS */
3538         resp.val = 0;
3539 
3540         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3541 
3542         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3543         EXPECT_EQ(true, WIFEXITED(status));
3544         EXPECT_EQ(0, WEXITSTATUS(status));
3545 }
3546 
3547 TEST(user_notification_closed_listener)
3548 {
3549         pid_t pid;
3550         long ret;
3551         int status, listener;
3552 
3553         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3554         ASSERT_EQ(0, ret) {
3555                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3556         }
3557 
3558         listener = user_notif_syscall(__NR_getppid,
3559                                       SECCOMP_FILTER_FLAG_NEW_LISTENER);
3560         ASSERT_GE(listener, 0);
3561 
3562         /*
3563          * Check that we get an ENOSYS when the listener is closed.
3564          */
3565         pid = fork();
3566         ASSERT_GE(pid, 0);
3567         if (pid == 0) {
3568                 close(listener);
3569                 ret = syscall(__NR_getppid);
3570                 exit(ret != -1 && errno != ENOSYS);
3571         }
3572 
3573         close(listener);
3574 
3575         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3576         EXPECT_EQ(true, WIFEXITED(status));
3577         EXPECT_EQ(0, WEXITSTATUS(status));
3578 }
3579 
3580 /*
3581  * Check that a pid in a child namespace still shows up as valid in ours.
3582  */
3583 TEST(user_notification_child_pid_ns)
3584 {
3585         pid_t pid;
3586         int status, listener;
3587         struct seccomp_notif req = {};
3588         struct seccomp_notif_resp resp = {};
3589 
3590         ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) {
3591                 if (errno == EINVAL)
3592                         SKIP(return, "kernel missing CLONE_NEWUSER support");
3593         };
3594 
3595         listener = user_notif_syscall(__NR_getppid,
3596                                       SECCOMP_FILTER_FLAG_NEW_LISTENER);
3597         ASSERT_GE(listener, 0);
3598 
3599         pid = fork();
3600         ASSERT_GE(pid, 0);
3601 
3602         if (pid == 0)
3603                 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3604 
3605         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3606         EXPECT_EQ(req.pid, pid);
3607 
3608         resp.id = req.id;
3609         resp.error = 0;
3610         resp.val = USER_NOTIF_MAGIC;
3611 
3612         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3613 
3614         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3615         EXPECT_EQ(true, WIFEXITED(status));
3616         EXPECT_EQ(0, WEXITSTATUS(status));
3617         close(listener);
3618 }
3619 
3620 /*
3621  * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e.
3622  * invalid.
3623  */
3624 TEST(user_notification_sibling_pid_ns)
3625 {
3626         pid_t pid, pid2;
3627         int status, listener;
3628         struct seccomp_notif req = {};
3629         struct seccomp_notif_resp resp = {};
3630 
3631         ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
3632                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3633         }
3634 
3635         listener = user_notif_syscall(__NR_getppid,
3636                                       SECCOMP_FILTER_FLAG_NEW_LISTENER);
3637         ASSERT_GE(listener, 0);
3638 
3639         pid = fork();
3640         ASSERT_GE(pid, 0);
3641 
3642         if (pid == 0) {
3643                 ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3644 
3645                 pid2 = fork();
3646                 ASSERT_GE(pid2, 0);
3647 
3648                 if (pid2 == 0)
3649                         exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3650 
3651                 EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3652                 EXPECT_EQ(true, WIFEXITED(status));
3653                 EXPECT_EQ(0, WEXITSTATUS(status));
3654                 exit(WEXITSTATUS(status));
3655         }
3656 
3657         /* Create the sibling ns, and sibling in it. */
3658         ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
3659                 if (errno == EPERM)
3660                         SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
3661         }
3662         ASSERT_EQ(errno, 0);
3663 
3664         pid2 = fork();
3665         ASSERT_GE(pid2, 0);
3666 
3667         if (pid2 == 0) {
3668                 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3669                 /*
3670                  * The pid should be 0, i.e. the task is in some namespace that
3671                  * we can't "see".
3672                  */
3673                 EXPECT_EQ(req.pid, 0);
3674 
3675                 resp.id = req.id;
3676                 resp.error = 0;
3677                 resp.val = USER_NOTIF_MAGIC;
3678 
3679                 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3680                 exit(0);
3681         }
3682 
3683         close(listener);
3684 
3685         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3686         EXPECT_EQ(true, WIFEXITED(status));
3687         EXPECT_EQ(0, WEXITSTATUS(status));
3688 
3689         EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3690         EXPECT_EQ(true, WIFEXITED(status));
3691         EXPECT_EQ(0, WEXITSTATUS(status));
3692 }
3693 
3694 TEST(user_notification_fault_recv)
3695 {
3696         pid_t pid;
3697         int status, listener;
3698         struct seccomp_notif req = {};
3699         struct seccomp_notif_resp resp = {};
3700 
3701         ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
3702 
3703         listener = user_notif_syscall(__NR_getppid,
3704                                       SECCOMP_FILTER_FLAG_NEW_LISTENER);
3705         ASSERT_GE(listener, 0);
3706 
3707         pid = fork();
3708         ASSERT_GE(pid, 0);
3709 
3710         if (pid == 0)
3711                 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3712 
3713         /* Do a bad recv() */
3714         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
3715         EXPECT_EQ(errno, EFAULT);
3716 
3717         /* We should still be able to receive this notification, though. */
3718         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3719         EXPECT_EQ(req.pid, pid);
3720 
3721         resp.id = req.id;
3722         resp.error = 0;
3723         resp.val = USER_NOTIF_MAGIC;
3724 
3725         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3726 
3727         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3728         EXPECT_EQ(true, WIFEXITED(status));
3729         EXPECT_EQ(0, WEXITSTATUS(status));
3730 }
3731 
3732 TEST(seccomp_get_notif_sizes)
3733 {
3734         struct seccomp_notif_sizes sizes;
3735 
3736         ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0);
3737         EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif));
3738         EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
3739 }
3740 
3741 TEST(user_notification_continue)
3742 {
3743         pid_t pid;
3744         long ret;
3745         int status, listener;
3746         struct seccomp_notif req = {};
3747         struct seccomp_notif_resp resp = {};
3748         struct pollfd pollfd;
3749 
3750         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3751         ASSERT_EQ(0, ret) {
3752                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3753         }
3754 
3755         listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3756         ASSERT_GE(listener, 0);
3757 
3758         pid = fork();
3759         ASSERT_GE(pid, 0);
3760 
3761         if (pid == 0) {
3762                 int dup_fd, pipe_fds[2];
3763                 pid_t self;
3764 
3765                 ASSERT_GE(pipe(pipe_fds), 0);
3766 
3767                 dup_fd = dup(pipe_fds[0]);
3768                 ASSERT_GE(dup_fd, 0);
3769                 EXPECT_NE(pipe_fds[0], dup_fd);
3770 
3771                 self = getpid();
3772                 ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0);
3773                 exit(0);
3774         }
3775 
3776         pollfd.fd = listener;
3777         pollfd.events = POLLIN | POLLOUT;
3778 
3779         EXPECT_GT(poll(&pollfd, 1, -1), 0);
3780         EXPECT_EQ(pollfd.revents, POLLIN);
3781 
3782         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3783 
3784         pollfd.fd = listener;
3785         pollfd.events = POLLIN | POLLOUT;
3786 
3787         EXPECT_GT(poll(&pollfd, 1, -1), 0);
3788         EXPECT_EQ(pollfd.revents, POLLOUT);
3789 
3790         EXPECT_EQ(req.data.nr, __NR_dup);
3791 
3792         resp.id = req.id;
3793         resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
3794 
3795         /*
3796          * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other
3797          * args be set to 0.
3798          */
3799         resp.error = 0;
3800         resp.val = USER_NOTIF_MAGIC;
3801         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3802         EXPECT_EQ(errno, EINVAL);
3803 
3804         resp.error = USER_NOTIF_MAGIC;
3805         resp.val = 0;
3806         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3807         EXPECT_EQ(errno, EINVAL);
3808 
3809         resp.error = 0;
3810         resp.val = 0;
3811         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
3812                 if (errno == EINVAL)
3813                         SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
3814         }
3815 
3816 skip:
3817         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3818         EXPECT_EQ(true, WIFEXITED(status));
3819         EXPECT_EQ(0, WEXITSTATUS(status)) {
3820                 if (WEXITSTATUS(status) == 2) {
3821                         SKIP(return, "Kernel does not support kcmp() syscall");
3822                         return;
3823                 }
3824         }
3825 }
3826 
3827 TEST(user_notification_filter_empty)
3828 {
3829         pid_t pid;
3830         long ret;
3831         int status;
3832         struct pollfd pollfd;
3833         struct __clone_args args = {
3834                 .flags = CLONE_FILES,
3835                 .exit_signal = SIGCHLD,
3836         };
3837 
3838         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3839         ASSERT_EQ(0, ret) {
3840                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3841         }
3842 
3843         pid = sys_clone3(&args, sizeof(args));
3844         ASSERT_GE(pid, 0);
3845 
3846         if (pid == 0) {
3847                 int listener;
3848 
3849                 listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3850                 if (listener < 0)
3851                         _exit(EXIT_FAILURE);
3852 
3853                 if (dup2(listener, 200) != 200)
3854                         _exit(EXIT_FAILURE);
3855 
3856                 close(listener);
3857 
3858                 _exit(EXIT_SUCCESS);
3859         }
3860 
3861         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3862         EXPECT_EQ(true, WIFEXITED(status));
3863         EXPECT_EQ(0, WEXITSTATUS(status));
3864 
3865         /*
3866          * The seccomp filter has become unused so we should be notified once
3867          * the kernel gets around to cleaning up task struct.
3868          */
3869         pollfd.fd = 200;
3870         pollfd.events = POLLHUP;
3871 
3872         EXPECT_GT(poll(&pollfd, 1, 2000), 0);
3873         EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
3874 }
3875 
3876 static void *do_thread(void *data)
3877 {
3878         return NULL;
3879 }
3880 
3881 TEST(user_notification_filter_empty_threaded)
3882 {
3883         pid_t pid;
3884         long ret;
3885         int status;
3886         struct pollfd pollfd;
3887         struct __clone_args args = {
3888                 .flags = CLONE_FILES,
3889                 .exit_signal = SIGCHLD,
3890         };
3891 
3892         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3893         ASSERT_EQ(0, ret) {
3894                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3895         }
3896 
3897         pid = sys_clone3(&args, sizeof(args));
3898         ASSERT_GE(pid, 0);
3899 
3900         if (pid == 0) {
3901                 pid_t pid1, pid2;
3902                 int listener, status;
3903                 pthread_t thread;
3904 
3905                 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3906                 if (listener < 0)
3907                         _exit(EXIT_FAILURE);
3908 
3909                 if (dup2(listener, 200) != 200)
3910                         _exit(EXIT_FAILURE);
3911 
3912                 close(listener);
3913 
3914                 pid1 = fork();
3915                 if (pid1 < 0)
3916                         _exit(EXIT_FAILURE);
3917 
3918                 if (pid1 == 0)
3919                         _exit(EXIT_SUCCESS);
3920 
3921                 pid2 = fork();
3922                 if (pid2 < 0)
3923                         _exit(EXIT_FAILURE);
3924 
3925                 if (pid2 == 0)
3926                         _exit(EXIT_SUCCESS);
3927 
3928                 if (pthread_create(&thread, NULL, do_thread, NULL) ||
3929                     pthread_join(thread, NULL))
3930                         _exit(EXIT_FAILURE);
3931 
3932                 if (pthread_create(&thread, NULL, do_thread, NULL) ||
3933                     pthread_join(thread, NULL))
3934                         _exit(EXIT_FAILURE);
3935 
3936                 if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) ||
3937                     WEXITSTATUS(status))
3938                         _exit(EXIT_FAILURE);
3939 
3940                 if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) ||
3941                     WEXITSTATUS(status))
3942                         _exit(EXIT_FAILURE);
3943 
3944                 exit(EXIT_SUCCESS);
3945         }
3946 
3947         EXPECT_EQ(waitpid(pid, &status, 0), pid);
3948         EXPECT_EQ(true, WIFEXITED(status));
3949         EXPECT_EQ(0, WEXITSTATUS(status));
3950 
3951         /*
3952          * The seccomp filter has become unused so we should be notified once
3953          * the kernel gets around to cleaning up task struct.
3954          */
3955         pollfd.fd = 200;
3956         pollfd.events = POLLHUP;
3957 
3958         EXPECT_GT(poll(&pollfd, 1, 2000), 0);
3959         EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
3960 }
3961 
3962 TEST(user_notification_addfd)
3963 {
3964         pid_t pid;
3965         long ret;
3966         int status, listener, memfd, fd, nextfd;
3967         struct seccomp_notif_addfd addfd = {};
3968         struct seccomp_notif_addfd_small small = {};
3969         struct seccomp_notif_addfd_big big = {};
3970         struct seccomp_notif req = {};
3971         struct seccomp_notif_resp resp = {};
3972         /* 100 ms */
3973         struct timespec delay = { .tv_nsec = 100000000 };
3974 
3975         /* There may be arbitrary already-open fds at test start. */
3976         memfd = memfd_create("test", 0);
3977         ASSERT_GE(memfd, 0);
3978         nextfd = memfd + 1;
3979 
3980         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3981         ASSERT_EQ(0, ret) {
3982                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3983         }
3984 
3985         /* fd: 4 */
3986         /* Check that the basic notification machinery works */
3987         listener = user_notif_syscall(__NR_getppid,
3988                                       SECCOMP_FILTER_FLAG_NEW_LISTENER);
3989         ASSERT_EQ(listener, nextfd++);
3990 
3991         pid = fork();
3992         ASSERT_GE(pid, 0);
3993 
3994         if (pid == 0) {
3995                 /* fds will be added and this value is expected */
3996                 if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
3997                         exit(1);
3998 
3999                 /* Atomic addfd+send is received here. Check it is a valid fd */
4000                 if (fcntl(syscall(__NR_getppid), F_GETFD) == -1)
4001                         exit(1);
4002 
4003                 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
4004         }
4005 
4006         ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4007 
4008         addfd.srcfd = memfd;
4009         addfd.newfd = 0;
4010         addfd.id = req.id;
4011         addfd.flags = 0x0;
4012 
4013         /* Verify bad newfd_flags cannot be set */
4014         addfd.newfd_flags = ~O_CLOEXEC;
4015         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4016         EXPECT_EQ(errno, EINVAL);
4017         addfd.newfd_flags = O_CLOEXEC;
4018 
4019         /* Verify bad flags cannot be set */
4020         addfd.flags = 0xff;
4021         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4022         EXPECT_EQ(errno, EINVAL);
4023         addfd.flags = 0;
4024 
4025         /* Verify that remote_fd cannot be set without setting flags */
4026         addfd.newfd = 1;
4027         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4028         EXPECT_EQ(errno, EINVAL);
4029         addfd.newfd = 0;
4030 
4031         /* Verify small size cannot be set */
4032         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1);
4033         EXPECT_EQ(errno, EINVAL);
4034 
4035         /* Verify we can't send bits filled in unknown buffer area */
4036         memset(&big, 0xAA, sizeof(big));
4037         big.addfd = addfd;
4038         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1);
4039         EXPECT_EQ(errno, E2BIG);
4040 
4041 
4042         /* Verify we can set an arbitrary remote fd */
4043         fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
4044         EXPECT_EQ(fd, nextfd++);
4045         EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
4046 
4047         /* Verify we can set an arbitrary remote fd with large size */
4048         memset(&big, 0x0, sizeof(big));
4049         big.addfd = addfd;
4050         fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
4051         EXPECT_EQ(fd, nextfd++);
4052 
4053         /* Verify we can set a specific remote fd */
4054         addfd.newfd = 42;
4055         addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
4056         fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
4057         EXPECT_EQ(fd, 42);
4058         EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
4059 
4060         /* Resume syscall */
4061         resp.id = req.id;
4062         resp.error = 0;
4063         resp.val = USER_NOTIF_MAGIC;
4064         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4065 
4066         /*
4067          * This sets the ID of the ADD FD to the last request plus 1. The
4068          * notification ID increments 1 per notification.
4069          */
4070         addfd.id = req.id + 1;
4071 
4072         /* This spins until the underlying notification is generated */
4073         while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
4074                errno != -EINPROGRESS)
4075                 nanosleep(&delay, NULL);
4076 
4077         memset(&req, 0, sizeof(req));
4078         ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4079         ASSERT_EQ(addfd.id, req.id);
4080 
4081         /* Verify we can do an atomic addfd and send */
4082         addfd.newfd = 0;
4083         addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
4084         fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
4085         /*
4086          * Child has earlier "low" fds and now 42, so we expect the next
4087          * lowest available fd to be assigned here.
4088          */
4089         EXPECT_EQ(fd, nextfd++);
4090         EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
4091 
4092         /*
4093          * This sets the ID of the ADD FD to the last request plus 1. The
4094          * notification ID increments 1 per notification.
4095          */
4096         addfd.id = req.id + 1;
4097 
4098         /* This spins until the underlying notification is generated */
4099         while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
4100                errno != -EINPROGRESS)
4101                 nanosleep(&delay, NULL);
4102 
4103         memset(&req, 0, sizeof(req));
4104         ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4105         ASSERT_EQ(addfd.id, req.id);
4106 
4107         resp.id = req.id;
4108         resp.error = 0;
4109         resp.val = USER_NOTIF_MAGIC;
4110         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4111 
4112         /* Wait for child to finish. */
4113         EXPECT_EQ(waitpid(pid, &status, 0), pid);
4114         EXPECT_EQ(true, WIFEXITED(status));
4115         EXPECT_EQ(0, WEXITSTATUS(status));
4116 
4117         close(memfd);
4118 }
4119 
4120 TEST(user_notification_addfd_rlimit)
4121 {
4122         pid_t pid;
4123         long ret;
4124         int status, listener, memfd;
4125         struct seccomp_notif_addfd addfd = {};
4126         struct seccomp_notif req = {};
4127         struct seccomp_notif_resp resp = {};
4128         const struct rlimit lim = {
4129                 .rlim_cur       = 0,
4130                 .rlim_max       = 0,
4131         };
4132 
4133         memfd = memfd_create("test", 0);
4134         ASSERT_GE(memfd, 0);
4135 
4136         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4137         ASSERT_EQ(0, ret) {
4138                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4139         }
4140 
4141         /* Check that the basic notification machinery works */
4142         listener = user_notif_syscall(__NR_getppid,
4143                                       SECCOMP_FILTER_FLAG_NEW_LISTENER);
4144         ASSERT_GE(listener, 0);
4145 
4146         pid = fork();
4147         ASSERT_GE(pid, 0);
4148 
4149         if (pid == 0)
4150                 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
4151 
4152 
4153         ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4154 
4155         ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0);
4156 
4157         addfd.srcfd = memfd;
4158         addfd.newfd_flags = O_CLOEXEC;
4159         addfd.newfd = 0;
4160         addfd.id = req.id;
4161         addfd.flags = 0;
4162 
4163         /* Should probably spot check /proc/sys/fs/file-nr */
4164         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4165         EXPECT_EQ(errno, EMFILE);
4166 
4167         addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
4168         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4169         EXPECT_EQ(errno, EMFILE);
4170 
4171         addfd.newfd = 100;
4172         addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
4173         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4174         EXPECT_EQ(errno, EBADF);
4175 
4176         resp.id = req.id;
4177         resp.error = 0;
4178         resp.val = USER_NOTIF_MAGIC;
4179 
4180         EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4181 
4182         /* Wait for child to finish. */
4183         EXPECT_EQ(waitpid(pid, &status, 0), pid);
4184         EXPECT_EQ(true, WIFEXITED(status));
4185         EXPECT_EQ(0, WEXITSTATUS(status));
4186 
4187         close(memfd);
4188 }
4189 
4190 /*
4191  * TODO:
4192  * - expand NNP testing
4193  * - better arch-specific TRACE and TRAP handlers.
4194  * - endianness checking when appropriate
4195  * - 64-bit arg prodding
4196  * - arch value testing (x86 modes especially)
4197  * - verify that FILTER_FLAG_LOG filters generate log messages
4198  * - verify that RET_LOG generates log messages
4199  */
4200 
4201 TEST_HARNESS_MAIN
4202 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp