~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/testing/selftests/seccomp/seccomp_bpf.c

Version: ~ [ linux-5.16-rc3 ] ~ [ linux-5.15.5 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.82 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.162 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.218 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.256 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.291 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.293 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
  3  * Use of this source code is governed by the GPLv2 license.
  4  *
  5  * Test code for seccomp bpf.
  6  */
  7 
  8 #include <sys/types.h>
  9 
 10 /*
 11  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
 12  * we need to use the kernel's siginfo.h file and trick glibc
 13  * into accepting it.
 14  */
 15 #if !__GLIBC_PREREQ(2, 26)
 16 # include <asm/siginfo.h>
 17 # define __have_siginfo_t 1
 18 # define __have_sigval_t 1
 19 # define __have_sigevent_t 1
 20 #endif
 21 
 22 #include <errno.h>
 23 #include <linux/filter.h>
 24 #include <sys/prctl.h>
 25 #include <sys/ptrace.h>
 26 #include <sys/user.h>
 27 #include <linux/prctl.h>
 28 #include <linux/ptrace.h>
 29 #include <linux/seccomp.h>
 30 #include <pthread.h>
 31 #include <semaphore.h>
 32 #include <signal.h>
 33 #include <stddef.h>
 34 #include <stdbool.h>
 35 #include <string.h>
 36 #include <time.h>
 37 #include <linux/elf.h>
 38 #include <sys/uio.h>
 39 #include <sys/utsname.h>
 40 #include <sys/fcntl.h>
 41 #include <sys/mman.h>
 42 #include <sys/times.h>
 43 
 44 #define _GNU_SOURCE
 45 #include <unistd.h>
 46 #include <sys/syscall.h>
 47 
 48 #include "../kselftest_harness.h"
 49 
 50 #ifndef PR_SET_PTRACER
 51 # define PR_SET_PTRACER 0x59616d61
 52 #endif
 53 
 54 #ifndef PR_SET_NO_NEW_PRIVS
 55 #define PR_SET_NO_NEW_PRIVS 38
 56 #define PR_GET_NO_NEW_PRIVS 39
 57 #endif
 58 
 59 #ifndef PR_SECCOMP_EXT
 60 #define PR_SECCOMP_EXT 43
 61 #endif
 62 
 63 #ifndef SECCOMP_EXT_ACT
 64 #define SECCOMP_EXT_ACT 1
 65 #endif
 66 
 67 #ifndef SECCOMP_EXT_ACT_TSYNC
 68 #define SECCOMP_EXT_ACT_TSYNC 1
 69 #endif
 70 
 71 #ifndef SECCOMP_MODE_STRICT
 72 #define SECCOMP_MODE_STRICT 1
 73 #endif
 74 
 75 #ifndef SECCOMP_MODE_FILTER
 76 #define SECCOMP_MODE_FILTER 2
 77 #endif
 78 
 79 #ifndef SECCOMP_RET_KILL
 80 #define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
 81 #define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
 82 #define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
 83 #define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
 84 #define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
 85 
 86 /* Masks for the return value sections. */
 87 #define SECCOMP_RET_ACTION      0x7fff0000U
 88 #define SECCOMP_RET_DATA        0x0000ffffU
 89 
 90 struct seccomp_data {
 91         int nr;
 92         __u32 arch;
 93         __u64 instruction_pointer;
 94         __u64 args[6];
 95 };
 96 #endif
 97 
 98 #if __BYTE_ORDER == __LITTLE_ENDIAN
 99 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
100 #elif __BYTE_ORDER == __BIG_ENDIAN
101 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
102 #else
103 #error "wut? Unknown __BYTE_ORDER?!"
104 #endif
105 
106 #define SIBLING_EXIT_UNKILLED   0xbadbeef
107 #define SIBLING_EXIT_FAILURE    0xbadface
108 #define SIBLING_EXIT_NEWPRIVS   0xbadfeed
109 
110 TEST(mode_strict_support)
111 {
112         long ret;
113 
114         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
115         ASSERT_EQ(0, ret) {
116                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
117         }
118         syscall(__NR_exit, 1);
119 }
120 
121 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
122 {
123         long ret;
124 
125         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
126         ASSERT_EQ(0, ret) {
127                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
128         }
129         syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
130                 NULL, NULL, NULL);
131         EXPECT_FALSE(true) {
132                 TH_LOG("Unreachable!");
133         }
134 }
135 
136 /* Note! This doesn't test no new privs behavior */
137 TEST(no_new_privs_support)
138 {
139         long ret;
140 
141         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
142         EXPECT_EQ(0, ret) {
143                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
144         }
145 }
146 
147 /* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
148 TEST(mode_filter_support)
149 {
150         long ret;
151 
152         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
153         ASSERT_EQ(0, ret) {
154                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
155         }
156         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
157         EXPECT_EQ(-1, ret);
158         EXPECT_EQ(EFAULT, errno) {
159                 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
160         }
161 }
162 
163 TEST(mode_filter_without_nnp)
164 {
165         struct sock_filter filter[] = {
166                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
167         };
168         struct sock_fprog prog = {
169                 .len = (unsigned short)ARRAY_SIZE(filter),
170                 .filter = filter,
171         };
172         long ret;
173 
174         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
175         ASSERT_LE(0, ret) {
176                 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
177         }
178         errno = 0;
179         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
180         /* Succeeds with CAP_SYS_ADMIN, fails without */
181         /* TODO(wad) check caps not euid */
182         if (geteuid()) {
183                 EXPECT_EQ(-1, ret);
184                 EXPECT_EQ(EACCES, errno);
185         } else {
186                 EXPECT_EQ(0, ret);
187         }
188 }
189 
190 #define MAX_INSNS_PER_PATH 32768
191 
192 TEST(filter_size_limits)
193 {
194         int i;
195         int count = BPF_MAXINSNS + 1;
196         struct sock_filter allow[] = {
197                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
198         };
199         struct sock_filter *filter;
200         struct sock_fprog prog = { };
201         long ret;
202 
203         filter = calloc(count, sizeof(*filter));
204         ASSERT_NE(NULL, filter);
205 
206         for (i = 0; i < count; i++)
207                 filter[i] = allow[0];
208 
209         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
210         ASSERT_EQ(0, ret);
211 
212         prog.filter = filter;
213         prog.len = count;
214 
215         /* Too many filter instructions in a single filter. */
216         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
217         ASSERT_NE(0, ret) {
218                 TH_LOG("Installing %d insn filter was allowed", prog.len);
219         }
220 
221         /* One less is okay, though. */
222         prog.len -= 1;
223         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
224         ASSERT_EQ(0, ret) {
225                 TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
226         }
227 }
228 
229 TEST(filter_chain_limits)
230 {
231         int i;
232         int count = BPF_MAXINSNS;
233         struct sock_filter allow[] = {
234                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
235         };
236         struct sock_filter *filter;
237         struct sock_fprog prog = { };
238         long ret;
239 
240         filter = calloc(count, sizeof(*filter));
241         ASSERT_NE(NULL, filter);
242 
243         for (i = 0; i < count; i++)
244                 filter[i] = allow[0];
245 
246         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
247         ASSERT_EQ(0, ret);
248 
249         prog.filter = filter;
250         prog.len = 1;
251 
252         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
253         ASSERT_EQ(0, ret);
254 
255         prog.len = count;
256 
257         /* Too many total filter instructions. */
258         for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
259                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
260                 if (ret != 0)
261                         break;
262         }
263         ASSERT_NE(0, ret) {
264                 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
265                        i, count, i * (count + 4));
266         }
267 }
268 
269 TEST(mode_filter_cannot_move_to_strict)
270 {
271         struct sock_filter filter[] = {
272                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
273         };
274         struct sock_fprog prog = {
275                 .len = (unsigned short)ARRAY_SIZE(filter),
276                 .filter = filter,
277         };
278         long ret;
279 
280         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
281         ASSERT_EQ(0, ret);
282 
283         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
284         ASSERT_EQ(0, ret);
285 
286         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
287         EXPECT_EQ(-1, ret);
288         EXPECT_EQ(EINVAL, errno);
289 }
290 
291 
292 TEST(mode_filter_get_seccomp)
293 {
294         struct sock_filter filter[] = {
295                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
296         };
297         struct sock_fprog prog = {
298                 .len = (unsigned short)ARRAY_SIZE(filter),
299                 .filter = filter,
300         };
301         long ret;
302 
303         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
304         ASSERT_EQ(0, ret);
305 
306         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
307         EXPECT_EQ(0, ret);
308 
309         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
310         ASSERT_EQ(0, ret);
311 
312         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
313         EXPECT_EQ(2, ret);
314 }
315 
316 
317 TEST(ALLOW_all)
318 {
319         struct sock_filter filter[] = {
320                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
321         };
322         struct sock_fprog prog = {
323                 .len = (unsigned short)ARRAY_SIZE(filter),
324                 .filter = filter,
325         };
326         long ret;
327 
328         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
329         ASSERT_EQ(0, ret);
330 
331         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
332         ASSERT_EQ(0, ret);
333 }
334 
335 TEST(empty_prog)
336 {
337         struct sock_filter filter[] = {
338         };
339         struct sock_fprog prog = {
340                 .len = (unsigned short)ARRAY_SIZE(filter),
341                 .filter = filter,
342         };
343         long ret;
344 
345         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
346         ASSERT_EQ(0, ret);
347 
348         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
349         EXPECT_EQ(-1, ret);
350         EXPECT_EQ(EINVAL, errno);
351 }
352 
353 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
354 {
355         struct sock_filter filter[] = {
356                 BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
357         };
358         struct sock_fprog prog = {
359                 .len = (unsigned short)ARRAY_SIZE(filter),
360                 .filter = filter,
361         };
362         long ret;
363 
364         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
365         ASSERT_EQ(0, ret);
366 
367         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
368         ASSERT_EQ(0, ret);
369         EXPECT_EQ(0, syscall(__NR_getpid)) {
370                 TH_LOG("getpid() shouldn't ever return");
371         }
372 }
373 
374 /* return code >= 0x80000000 is unused. */
375 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
376 {
377         struct sock_filter filter[] = {
378                 BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
379         };
380         struct sock_fprog prog = {
381                 .len = (unsigned short)ARRAY_SIZE(filter),
382                 .filter = filter,
383         };
384         long ret;
385 
386         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
387         ASSERT_EQ(0, ret);
388 
389         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
390         ASSERT_EQ(0, ret);
391         EXPECT_EQ(0, syscall(__NR_getpid)) {
392                 TH_LOG("getpid() shouldn't ever return");
393         }
394 }
395 
396 TEST_SIGNAL(KILL_all, SIGSYS)
397 {
398         struct sock_filter filter[] = {
399                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
400         };
401         struct sock_fprog prog = {
402                 .len = (unsigned short)ARRAY_SIZE(filter),
403                 .filter = filter,
404         };
405         long ret;
406 
407         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
408         ASSERT_EQ(0, ret);
409 
410         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
411         ASSERT_EQ(0, ret);
412 }
413 
414 TEST_SIGNAL(KILL_one, SIGSYS)
415 {
416         struct sock_filter filter[] = {
417                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
418                         offsetof(struct seccomp_data, nr)),
419                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
420                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
421                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
422         };
423         struct sock_fprog prog = {
424                 .len = (unsigned short)ARRAY_SIZE(filter),
425                 .filter = filter,
426         };
427         long ret;
428         pid_t parent = getppid();
429 
430         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
431         ASSERT_EQ(0, ret);
432 
433         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
434         ASSERT_EQ(0, ret);
435 
436         EXPECT_EQ(parent, syscall(__NR_getppid));
437         /* getpid() should never return. */
438         EXPECT_EQ(0, syscall(__NR_getpid));
439 }
440 
441 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
442 {
443         void *fatal_address;
444         struct sock_filter filter[] = {
445                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
446                         offsetof(struct seccomp_data, nr)),
447                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
448                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
449                 /* Only both with lower 32-bit for now. */
450                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
451                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
452                         (unsigned long)&fatal_address, 0, 1),
453                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
454                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
455         };
456         struct sock_fprog prog = {
457                 .len = (unsigned short)ARRAY_SIZE(filter),
458                 .filter = filter,
459         };
460         long ret;
461         pid_t parent = getppid();
462         struct tms timebuf;
463         clock_t clock = times(&timebuf);
464 
465         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
466         ASSERT_EQ(0, ret);
467 
468         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
469         ASSERT_EQ(0, ret);
470 
471         EXPECT_EQ(parent, syscall(__NR_getppid));
472         EXPECT_LE(clock, syscall(__NR_times, &timebuf));
473         /* times() should never return. */
474         EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
475 }
476 
477 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
478 {
479 #ifndef __NR_mmap2
480         int sysno = __NR_mmap;
481 #else
482         int sysno = __NR_mmap2;
483 #endif
484         struct sock_filter filter[] = {
485                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
486                         offsetof(struct seccomp_data, nr)),
487                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
488                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
489                 /* Only both with lower 32-bit for now. */
490                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
491                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
492                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
493                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
494         };
495         struct sock_fprog prog = {
496                 .len = (unsigned short)ARRAY_SIZE(filter),
497                 .filter = filter,
498         };
499         long ret;
500         pid_t parent = getppid();
501         int fd;
502         void *map1, *map2;
503         int page_size = sysconf(_SC_PAGESIZE);
504 
505         ASSERT_LT(0, page_size);
506 
507         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
508         ASSERT_EQ(0, ret);
509 
510         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
511         ASSERT_EQ(0, ret);
512 
513         fd = open("/dev/zero", O_RDONLY);
514         ASSERT_NE(-1, fd);
515 
516         EXPECT_EQ(parent, syscall(__NR_getppid));
517         map1 = (void *)syscall(sysno,
518                 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
519         EXPECT_NE(MAP_FAILED, map1);
520         /* mmap2() should never return. */
521         map2 = (void *)syscall(sysno,
522                  NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
523         EXPECT_EQ(MAP_FAILED, map2);
524 
525         /* The test failed, so clean up the resources. */
526         munmap(map1, page_size);
527         munmap(map2, page_size);
528         close(fd);
529 }
530 
531 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
532 TEST(arg_out_of_range)
533 {
534         struct sock_filter filter[] = {
535                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
536                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
537         };
538         struct sock_fprog prog = {
539                 .len = (unsigned short)ARRAY_SIZE(filter),
540                 .filter = filter,
541         };
542         long ret;
543 
544         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
545         ASSERT_EQ(0, ret);
546 
547         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
548         EXPECT_EQ(-1, ret);
549         EXPECT_EQ(EINVAL, errno);
550 }
551 
552 TEST(ERRNO_valid)
553 {
554         struct sock_filter filter[] = {
555                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
556                         offsetof(struct seccomp_data, nr)),
557                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
558                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
559                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
560         };
561         struct sock_fprog prog = {
562                 .len = (unsigned short)ARRAY_SIZE(filter),
563                 .filter = filter,
564         };
565         long ret;
566         pid_t parent = getppid();
567 
568         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
569         ASSERT_EQ(0, ret);
570 
571         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
572         ASSERT_EQ(0, ret);
573 
574         EXPECT_EQ(parent, syscall(__NR_getppid));
575         EXPECT_EQ(-1, read(0, NULL, 0));
576         EXPECT_EQ(E2BIG, errno);
577 }
578 
579 TEST(ERRNO_zero)
580 {
581         struct sock_filter filter[] = {
582                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
583                         offsetof(struct seccomp_data, nr)),
584                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
585                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
586                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
587         };
588         struct sock_fprog prog = {
589                 .len = (unsigned short)ARRAY_SIZE(filter),
590                 .filter = filter,
591         };
592         long ret;
593         pid_t parent = getppid();
594 
595         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
596         ASSERT_EQ(0, ret);
597 
598         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
599         ASSERT_EQ(0, ret);
600 
601         EXPECT_EQ(parent, syscall(__NR_getppid));
602         /* "errno" of 0 is ok. */
603         EXPECT_EQ(0, read(0, NULL, 0));
604 }
605 
606 TEST(ERRNO_capped)
607 {
608         struct sock_filter filter[] = {
609                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
610                         offsetof(struct seccomp_data, nr)),
611                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
612                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
613                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
614         };
615         struct sock_fprog prog = {
616                 .len = (unsigned short)ARRAY_SIZE(filter),
617                 .filter = filter,
618         };
619         long ret;
620         pid_t parent = getppid();
621 
622         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
623         ASSERT_EQ(0, ret);
624 
625         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
626         ASSERT_EQ(0, ret);
627 
628         EXPECT_EQ(parent, syscall(__NR_getppid));
629         EXPECT_EQ(-1, read(0, NULL, 0));
630         EXPECT_EQ(4095, errno);
631 }
632 
633 FIXTURE_DATA(TRAP) {
634         struct sock_fprog prog;
635 };
636 
637 FIXTURE_SETUP(TRAP)
638 {
639         struct sock_filter filter[] = {
640                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
641                         offsetof(struct seccomp_data, nr)),
642                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
643                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
644                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
645         };
646 
647         memset(&self->prog, 0, sizeof(self->prog));
648         self->prog.filter = malloc(sizeof(filter));
649         ASSERT_NE(NULL, self->prog.filter);
650         memcpy(self->prog.filter, filter, sizeof(filter));
651         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
652 }
653 
654 FIXTURE_TEARDOWN(TRAP)
655 {
656         if (self->prog.filter)
657                 free(self->prog.filter);
658 }
659 
660 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
661 {
662         long ret;
663 
664         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
665         ASSERT_EQ(0, ret);
666 
667         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
668         ASSERT_EQ(0, ret);
669         syscall(__NR_getpid);
670 }
671 
672 /* Ensure that SIGSYS overrides SIG_IGN */
673 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
674 {
675         long ret;
676 
677         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
678         ASSERT_EQ(0, ret);
679 
680         signal(SIGSYS, SIG_IGN);
681 
682         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
683         ASSERT_EQ(0, ret);
684         syscall(__NR_getpid);
685 }
686 
687 static siginfo_t TRAP_info;
688 static volatile int TRAP_nr;
689 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
690 {
691         memcpy(&TRAP_info, info, sizeof(TRAP_info));
692         TRAP_nr = nr;
693 }
694 
695 TEST_F(TRAP, handler)
696 {
697         int ret, test;
698         struct sigaction act;
699         sigset_t mask;
700 
701         memset(&act, 0, sizeof(act));
702         sigemptyset(&mask);
703         sigaddset(&mask, SIGSYS);
704 
705         act.sa_sigaction = &TRAP_action;
706         act.sa_flags = SA_SIGINFO;
707         ret = sigaction(SIGSYS, &act, NULL);
708         ASSERT_EQ(0, ret) {
709                 TH_LOG("sigaction failed");
710         }
711         ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
712         ASSERT_EQ(0, ret) {
713                 TH_LOG("sigprocmask failed");
714         }
715 
716         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
717         ASSERT_EQ(0, ret);
718         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
719         ASSERT_EQ(0, ret);
720         TRAP_nr = 0;
721         memset(&TRAP_info, 0, sizeof(TRAP_info));
722         /* Expect the registers to be rolled back. (nr = error) may vary
723          * based on arch. */
724         ret = syscall(__NR_getpid);
725         /* Silence gcc warning about volatile. */
726         test = TRAP_nr;
727         EXPECT_EQ(SIGSYS, test);
728         struct local_sigsys {
729                 void *_call_addr;       /* calling user insn */
730                 int _syscall;           /* triggering system call number */
731                 unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
732         } *sigsys = (struct local_sigsys *)
733 #ifdef si_syscall
734                 &(TRAP_info.si_call_addr);
735 #else
736                 &TRAP_info.si_pid;
737 #endif
738         EXPECT_EQ(__NR_getpid, sigsys->_syscall);
739         /* Make sure arch is non-zero. */
740         EXPECT_NE(0, sigsys->_arch);
741         EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
742 }
743 
744 FIXTURE_DATA(precedence) {
745         struct sock_fprog allow;
746         struct sock_fprog trace;
747         struct sock_fprog error;
748         struct sock_fprog trap;
749         struct sock_fprog kill;
750 };
751 
752 FIXTURE_SETUP(precedence)
753 {
754         struct sock_filter allow_insns[] = {
755                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
756         };
757         struct sock_filter trace_insns[] = {
758                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
759                         offsetof(struct seccomp_data, nr)),
760                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
761                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
762                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
763         };
764         struct sock_filter error_insns[] = {
765                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
766                         offsetof(struct seccomp_data, nr)),
767                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
768                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
769                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
770         };
771         struct sock_filter trap_insns[] = {
772                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
773                         offsetof(struct seccomp_data, nr)),
774                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
775                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
776                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
777         };
778         struct sock_filter kill_insns[] = {
779                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
780                         offsetof(struct seccomp_data, nr)),
781                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
782                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
783                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
784         };
785 
786         memset(self, 0, sizeof(*self));
787 #define FILTER_ALLOC(_x) \
788         self->_x.filter = malloc(sizeof(_x##_insns)); \
789         ASSERT_NE(NULL, self->_x.filter); \
790         memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
791         self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
792         FILTER_ALLOC(allow);
793         FILTER_ALLOC(trace);
794         FILTER_ALLOC(error);
795         FILTER_ALLOC(trap);
796         FILTER_ALLOC(kill);
797 }
798 
799 FIXTURE_TEARDOWN(precedence)
800 {
801 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
802         FILTER_FREE(allow);
803         FILTER_FREE(trace);
804         FILTER_FREE(error);
805         FILTER_FREE(trap);
806         FILTER_FREE(kill);
807 }
808 
809 TEST_F(precedence, allow_ok)
810 {
811         pid_t parent, res = 0;
812         long ret;
813 
814         parent = getppid();
815         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
816         ASSERT_EQ(0, ret);
817 
818         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
819         ASSERT_EQ(0, ret);
820         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
821         ASSERT_EQ(0, ret);
822         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
823         ASSERT_EQ(0, ret);
824         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
825         ASSERT_EQ(0, ret);
826         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
827         ASSERT_EQ(0, ret);
828         /* Should work just fine. */
829         res = syscall(__NR_getppid);
830         EXPECT_EQ(parent, res);
831 }
832 
833 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
834 {
835         pid_t parent, res = 0;
836         long ret;
837 
838         parent = getppid();
839         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
840         ASSERT_EQ(0, ret);
841 
842         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
843         ASSERT_EQ(0, ret);
844         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
845         ASSERT_EQ(0, ret);
846         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
847         ASSERT_EQ(0, ret);
848         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
849         ASSERT_EQ(0, ret);
850         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
851         ASSERT_EQ(0, ret);
852         /* Should work just fine. */
853         res = syscall(__NR_getppid);
854         EXPECT_EQ(parent, res);
855         /* getpid() should never return. */
856         res = syscall(__NR_getpid);
857         EXPECT_EQ(0, res);
858 }
859 
860 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
861 {
862         pid_t parent;
863         long ret;
864 
865         parent = getppid();
866         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
867         ASSERT_EQ(0, ret);
868 
869         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
870         ASSERT_EQ(0, ret);
871         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
872         ASSERT_EQ(0, ret);
873         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
874         ASSERT_EQ(0, ret);
875         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
876         ASSERT_EQ(0, ret);
877         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
878         ASSERT_EQ(0, ret);
879         /* Should work just fine. */
880         EXPECT_EQ(parent, syscall(__NR_getppid));
881         /* getpid() should never return. */
882         EXPECT_EQ(0, syscall(__NR_getpid));
883 }
884 
885 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
886 {
887         pid_t parent;
888         long ret;
889 
890         parent = getppid();
891         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
892         ASSERT_EQ(0, ret);
893 
894         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
895         ASSERT_EQ(0, ret);
896         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
897         ASSERT_EQ(0, ret);
898         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
899         ASSERT_EQ(0, ret);
900         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
901         ASSERT_EQ(0, ret);
902         /* Should work just fine. */
903         EXPECT_EQ(parent, syscall(__NR_getppid));
904         /* getpid() should never return. */
905         EXPECT_EQ(0, syscall(__NR_getpid));
906 }
907 
908 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
909 {
910         pid_t parent;
911         long ret;
912 
913         parent = getppid();
914         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
915         ASSERT_EQ(0, ret);
916 
917         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
918         ASSERT_EQ(0, ret);
919         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
920         ASSERT_EQ(0, ret);
921         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
922         ASSERT_EQ(0, ret);
923         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
924         ASSERT_EQ(0, ret);
925         /* Should work just fine. */
926         EXPECT_EQ(parent, syscall(__NR_getppid));
927         /* getpid() should never return. */
928         EXPECT_EQ(0, syscall(__NR_getpid));
929 }
930 
931 TEST_F(precedence, errno_is_third)
932 {
933         pid_t parent;
934         long ret;
935 
936         parent = getppid();
937         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
938         ASSERT_EQ(0, ret);
939 
940         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
941         ASSERT_EQ(0, ret);
942         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
943         ASSERT_EQ(0, ret);
944         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
945         ASSERT_EQ(0, ret);
946         /* Should work just fine. */
947         EXPECT_EQ(parent, syscall(__NR_getppid));
948         EXPECT_EQ(0, syscall(__NR_getpid));
949 }
950 
951 TEST_F(precedence, errno_is_third_in_any_order)
952 {
953         pid_t parent;
954         long ret;
955 
956         parent = getppid();
957         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
958         ASSERT_EQ(0, ret);
959 
960         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
961         ASSERT_EQ(0, ret);
962         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
963         ASSERT_EQ(0, ret);
964         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
965         ASSERT_EQ(0, ret);
966         /* Should work just fine. */
967         EXPECT_EQ(parent, syscall(__NR_getppid));
968         EXPECT_EQ(0, syscall(__NR_getpid));
969 }
970 
971 TEST_F(precedence, trace_is_fourth)
972 {
973         pid_t parent;
974         long ret;
975 
976         parent = getppid();
977         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
978         ASSERT_EQ(0, ret);
979 
980         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
981         ASSERT_EQ(0, ret);
982         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
983         ASSERT_EQ(0, ret);
984         /* Should work just fine. */
985         EXPECT_EQ(parent, syscall(__NR_getppid));
986         /* No ptracer */
987         EXPECT_EQ(-1, syscall(__NR_getpid));
988 }
989 
990 TEST_F(precedence, trace_is_fourth_in_any_order)
991 {
992         pid_t parent;
993         long ret;
994 
995         parent = getppid();
996         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
997         ASSERT_EQ(0, ret);
998 
999         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1000         ASSERT_EQ(0, ret);
1001         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1002         ASSERT_EQ(0, ret);
1003         /* Should work just fine. */
1004         EXPECT_EQ(parent, syscall(__NR_getppid));
1005         /* No ptracer */
1006         EXPECT_EQ(-1, syscall(__NR_getpid));
1007 }
1008 
1009 #ifndef PTRACE_O_TRACESECCOMP
1010 #define PTRACE_O_TRACESECCOMP   0x00000080
1011 #endif
1012 
1013 /* Catch the Ubuntu 12.04 value error. */
1014 #if PTRACE_EVENT_SECCOMP != 7
1015 #undef PTRACE_EVENT_SECCOMP
1016 #endif
1017 
1018 #ifndef PTRACE_EVENT_SECCOMP
1019 #define PTRACE_EVENT_SECCOMP 7
1020 #endif
1021 
1022 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1023 bool tracer_running;
1024 void tracer_stop(int sig)
1025 {
1026         tracer_running = false;
1027 }
1028 
1029 typedef void tracer_func_t(struct __test_metadata *_metadata,
1030                            pid_t tracee, int status, void *args);
1031 
1032 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1033             tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1034 {
1035         int ret = -1;
1036         struct sigaction action = {
1037                 .sa_handler = tracer_stop,
1038         };
1039 
1040         /* Allow external shutdown. */
1041         tracer_running = true;
1042         ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1043 
1044         errno = 0;
1045         while (ret == -1 && errno != EINVAL)
1046                 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1047         ASSERT_EQ(0, ret) {
1048                 kill(tracee, SIGKILL);
1049         }
1050         /* Wait for attach stop */
1051         wait(NULL);
1052 
1053         ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1054                                                       PTRACE_O_TRACESYSGOOD :
1055                                                       PTRACE_O_TRACESECCOMP);
1056         ASSERT_EQ(0, ret) {
1057                 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1058                 kill(tracee, SIGKILL);
1059         }
1060         ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1061                      tracee, NULL, 0);
1062         ASSERT_EQ(0, ret);
1063 
1064         /* Unblock the tracee */
1065         ASSERT_EQ(1, write(fd, "A", 1));
1066         ASSERT_EQ(0, close(fd));
1067 
1068         /* Run until we're shut down. Must assert to stop execution. */
1069         while (tracer_running) {
1070                 int status;
1071 
1072                 if (wait(&status) != tracee)
1073                         continue;
1074                 if (WIFSIGNALED(status) || WIFEXITED(status))
1075                         /* Child is dead. Time to go. */
1076                         return;
1077 
1078                 /* Check if this is a seccomp event. */
1079                 ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1080 
1081                 tracer_func(_metadata, tracee, status, args);
1082 
1083                 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1084                              tracee, NULL, 0);
1085                 ASSERT_EQ(0, ret);
1086         }
1087         /* Directly report the status of our test harness results. */
1088         syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1089 }
1090 
1091 /* Common tracer setup/teardown functions. */
1092 void cont_handler(int num)
1093 { }
1094 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1095                           tracer_func_t func, void *args, bool ptrace_syscall)
1096 {
1097         char sync;
1098         int pipefd[2];
1099         pid_t tracer_pid;
1100         pid_t tracee = getpid();
1101 
1102         /* Setup a pipe for clean synchronization. */
1103         ASSERT_EQ(0, pipe(pipefd));
1104 
1105         /* Fork a child which we'll promote to tracer */
1106         tracer_pid = fork();
1107         ASSERT_LE(0, tracer_pid);
1108         signal(SIGALRM, cont_handler);
1109         if (tracer_pid == 0) {
1110                 close(pipefd[0]);
1111                 start_tracer(_metadata, pipefd[1], tracee, func, args,
1112                              ptrace_syscall);
1113                 syscall(__NR_exit, 0);
1114         }
1115         close(pipefd[1]);
1116         prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1117         read(pipefd[0], &sync, 1);
1118         close(pipefd[0]);
1119 
1120         return tracer_pid;
1121 }
1122 void teardown_trace_fixture(struct __test_metadata *_metadata,
1123                             pid_t tracer)
1124 {
1125         if (tracer) {
1126                 int status;
1127                 /*
1128                  * Extract the exit code from the other process and
1129                  * adopt it for ourselves in case its asserts failed.
1130                  */
1131                 ASSERT_EQ(0, kill(tracer, SIGUSR1));
1132                 ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1133                 if (WEXITSTATUS(status))
1134                         _metadata->passed = 0;
1135         }
1136 }
1137 
1138 /* "poke" tracer arguments and function. */
1139 struct tracer_args_poke_t {
1140         unsigned long poke_addr;
1141 };
1142 
1143 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1144                  void *args)
1145 {
1146         int ret;
1147         unsigned long msg;
1148         struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1149 
1150         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1151         EXPECT_EQ(0, ret);
1152         /* If this fails, don't try to recover. */
1153         ASSERT_EQ(0x1001, msg) {
1154                 kill(tracee, SIGKILL);
1155         }
1156         /*
1157          * Poke in the message.
1158          * Registers are not touched to try to keep this relatively arch
1159          * agnostic.
1160          */
1161         ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1162         EXPECT_EQ(0, ret);
1163 }
1164 
1165 FIXTURE_DATA(TRACE_poke) {
1166         struct sock_fprog prog;
1167         pid_t tracer;
1168         long poked;
1169         struct tracer_args_poke_t tracer_args;
1170 };
1171 
1172 FIXTURE_SETUP(TRACE_poke)
1173 {
1174         struct sock_filter filter[] = {
1175                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1176                         offsetof(struct seccomp_data, nr)),
1177                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1178                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1179                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1180         };
1181 
1182         self->poked = 0;
1183         memset(&self->prog, 0, sizeof(self->prog));
1184         self->prog.filter = malloc(sizeof(filter));
1185         ASSERT_NE(NULL, self->prog.filter);
1186         memcpy(self->prog.filter, filter, sizeof(filter));
1187         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1188 
1189         /* Set up tracer args. */
1190         self->tracer_args.poke_addr = (unsigned long)&self->poked;
1191 
1192         /* Launch tracer. */
1193         self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1194                                            &self->tracer_args, false);
1195 }
1196 
1197 FIXTURE_TEARDOWN(TRACE_poke)
1198 {
1199         teardown_trace_fixture(_metadata, self->tracer);
1200         if (self->prog.filter)
1201                 free(self->prog.filter);
1202 }
1203 
1204 TEST_F(TRACE_poke, read_has_side_effects)
1205 {
1206         ssize_t ret;
1207 
1208         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1209         ASSERT_EQ(0, ret);
1210 
1211         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1212         ASSERT_EQ(0, ret);
1213 
1214         EXPECT_EQ(0, self->poked);
1215         ret = read(-1, NULL, 0);
1216         EXPECT_EQ(-1, ret);
1217         EXPECT_EQ(0x1001, self->poked);
1218 }
1219 
1220 TEST_F(TRACE_poke, getpid_runs_normally)
1221 {
1222         long ret;
1223 
1224         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1225         ASSERT_EQ(0, ret);
1226 
1227         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1228         ASSERT_EQ(0, ret);
1229 
1230         EXPECT_EQ(0, self->poked);
1231         EXPECT_NE(0, syscall(__NR_getpid));
1232         EXPECT_EQ(0, self->poked);
1233 }
1234 
1235 #if defined(__x86_64__)
1236 # define ARCH_REGS      struct user_regs_struct
1237 # define SYSCALL_NUM    orig_rax
1238 # define SYSCALL_RET    rax
1239 #elif defined(__i386__)
1240 # define ARCH_REGS      struct user_regs_struct
1241 # define SYSCALL_NUM    orig_eax
1242 # define SYSCALL_RET    eax
1243 #elif defined(__arm__)
1244 # define ARCH_REGS      struct pt_regs
1245 # define SYSCALL_NUM    ARM_r7
1246 # define SYSCALL_RET    ARM_r0
1247 #elif defined(__aarch64__)
1248 # define ARCH_REGS      struct user_pt_regs
1249 # define SYSCALL_NUM    regs[8]
1250 # define SYSCALL_RET    regs[0]
1251 #elif defined(__hppa__)
1252 # define ARCH_REGS      struct user_regs_struct
1253 # define SYSCALL_NUM    gr[20]
1254 # define SYSCALL_RET    gr[28]
1255 #elif defined(__powerpc__)
1256 # define ARCH_REGS      struct pt_regs
1257 # define SYSCALL_NUM    gpr[0]
1258 # define SYSCALL_RET    gpr[3]
1259 #elif defined(__s390__)
1260 # define ARCH_REGS     s390_regs
1261 # define SYSCALL_NUM   gprs[2]
1262 # define SYSCALL_RET   gprs[2]
1263 #elif defined(__mips__)
1264 # define ARCH_REGS      struct pt_regs
1265 # define SYSCALL_NUM    regs[2]
1266 # define SYSCALL_SYSCALL_NUM regs[4]
1267 # define SYSCALL_RET    regs[2]
1268 # define SYSCALL_NUM_RET_SHARE_REG
1269 #else
1270 # error "Do not know how to find your architecture's registers and syscalls"
1271 #endif
1272 
1273 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1274  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1275  */
1276 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1277 #define HAVE_GETREGS
1278 #endif
1279 
1280 /* Architecture-specific syscall fetching routine. */
1281 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1282 {
1283         ARCH_REGS regs;
1284 #ifdef HAVE_GETREGS
1285         EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1286                 TH_LOG("PTRACE_GETREGS failed");
1287                 return -1;
1288         }
1289 #else
1290         struct iovec iov;
1291 
1292         iov.iov_base = &regs;
1293         iov.iov_len = sizeof(regs);
1294         EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1295                 TH_LOG("PTRACE_GETREGSET failed");
1296                 return -1;
1297         }
1298 #endif
1299 
1300 #if defined(__mips__)
1301         if (regs.SYSCALL_NUM == __NR_O32_Linux)
1302                 return regs.SYSCALL_SYSCALL_NUM;
1303 #endif
1304         return regs.SYSCALL_NUM;
1305 }
1306 
1307 /* Architecture-specific syscall changing routine. */
1308 void change_syscall(struct __test_metadata *_metadata,
1309                     pid_t tracee, int syscall)
1310 {
1311         int ret;
1312         ARCH_REGS regs;
1313 #ifdef HAVE_GETREGS
1314         ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1315 #else
1316         struct iovec iov;
1317         iov.iov_base = &regs;
1318         iov.iov_len = sizeof(regs);
1319         ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1320 #endif
1321         EXPECT_EQ(0, ret) {}
1322 
1323 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1324     defined(__s390__) || defined(__hppa__)
1325         {
1326                 regs.SYSCALL_NUM = syscall;
1327         }
1328 #elif defined(__mips__)
1329         {
1330                 if (regs.SYSCALL_NUM == __NR_O32_Linux)
1331                         regs.SYSCALL_SYSCALL_NUM = syscall;
1332                 else
1333                         regs.SYSCALL_NUM = syscall;
1334         }
1335 
1336 #elif defined(__arm__)
1337 # ifndef PTRACE_SET_SYSCALL
1338 #  define PTRACE_SET_SYSCALL   23
1339 # endif
1340         {
1341                 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1342                 EXPECT_EQ(0, ret);
1343         }
1344 
1345 #elif defined(__aarch64__)
1346 # ifndef NT_ARM_SYSTEM_CALL
1347 #  define NT_ARM_SYSTEM_CALL 0x404
1348 # endif
1349         {
1350                 iov.iov_base = &syscall;
1351                 iov.iov_len = sizeof(syscall);
1352                 ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1353                              &iov);
1354                 EXPECT_EQ(0, ret);
1355         }
1356 
1357 #else
1358         ASSERT_EQ(1, 0) {
1359                 TH_LOG("How is the syscall changed on this architecture?");
1360         }
1361 #endif
1362 
1363         /* If syscall is skipped, change return value. */
1364         if (syscall == -1)
1365 #ifdef SYSCALL_NUM_RET_SHARE_REG
1366                 TH_LOG("Can't modify syscall return on this architecture");
1367 #else
1368                 regs.SYSCALL_RET = 1;
1369 #endif
1370 
1371 #ifdef HAVE_GETREGS
1372         ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1373 #else
1374         iov.iov_base = &regs;
1375         iov.iov_len = sizeof(regs);
1376         ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1377 #endif
1378         EXPECT_EQ(0, ret);
1379 }
1380 
1381 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1382                     int status, void *args)
1383 {
1384         int ret;
1385         unsigned long msg;
1386 
1387         /* Make sure we got the right message. */
1388         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1389         EXPECT_EQ(0, ret);
1390 
1391         /* Validate and take action on expected syscalls. */
1392         switch (msg) {
1393         case 0x1002:
1394                 /* change getpid to getppid. */
1395                 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1396                 change_syscall(_metadata, tracee, __NR_getppid);
1397                 break;
1398         case 0x1003:
1399                 /* skip gettid. */
1400                 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1401                 change_syscall(_metadata, tracee, -1);
1402                 break;
1403         case 0x1004:
1404                 /* do nothing (allow getppid) */
1405                 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1406                 break;
1407         default:
1408                 EXPECT_EQ(0, msg) {
1409                         TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1410                         kill(tracee, SIGKILL);
1411                 }
1412         }
1413 
1414 }
1415 
1416 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1417                    int status, void *args)
1418 {
1419         int ret, nr;
1420         unsigned long msg;
1421         static bool entry;
1422 
1423         /* Make sure we got an empty message. */
1424         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1425         EXPECT_EQ(0, ret);
1426         EXPECT_EQ(0, msg);
1427 
1428         /* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */
1429         entry = !entry;
1430         if (!entry)
1431                 return;
1432 
1433         nr = get_syscall(_metadata, tracee);
1434 
1435         if (nr == __NR_getpid)
1436                 change_syscall(_metadata, tracee, __NR_getppid);
1437 }
1438 
1439 FIXTURE_DATA(TRACE_syscall) {
1440         struct sock_fprog prog;
1441         pid_t tracer, mytid, mypid, parent;
1442 };
1443 
1444 FIXTURE_SETUP(TRACE_syscall)
1445 {
1446         struct sock_filter filter[] = {
1447                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1448                         offsetof(struct seccomp_data, nr)),
1449                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1450                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1451                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1452                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1453                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1454                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1455                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1456         };
1457 
1458         memset(&self->prog, 0, sizeof(self->prog));
1459         self->prog.filter = malloc(sizeof(filter));
1460         ASSERT_NE(NULL, self->prog.filter);
1461         memcpy(self->prog.filter, filter, sizeof(filter));
1462         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1463 
1464         /* Prepare some testable syscall results. */
1465         self->mytid = syscall(__NR_gettid);
1466         ASSERT_GT(self->mytid, 0);
1467         ASSERT_NE(self->mytid, 1) {
1468                 TH_LOG("Running this test as init is not supported. :)");
1469         }
1470 
1471         self->mypid = getpid();
1472         ASSERT_GT(self->mypid, 0);
1473         ASSERT_EQ(self->mytid, self->mypid);
1474 
1475         self->parent = getppid();
1476         ASSERT_GT(self->parent, 0);
1477         ASSERT_NE(self->parent, self->mypid);
1478 
1479         /* Launch tracer. */
1480         self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1481                                            false);
1482 }
1483 
1484 FIXTURE_TEARDOWN(TRACE_syscall)
1485 {
1486         teardown_trace_fixture(_metadata, self->tracer);
1487         if (self->prog.filter)
1488                 free(self->prog.filter);
1489 }
1490 
1491 TEST_F(TRACE_syscall, syscall_allowed)
1492 {
1493         long ret;
1494 
1495         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1496         ASSERT_EQ(0, ret);
1497 
1498         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1499         ASSERT_EQ(0, ret);
1500 
1501         /* getppid works as expected (no changes). */
1502         EXPECT_EQ(self->parent, syscall(__NR_getppid));
1503         EXPECT_NE(self->mypid, syscall(__NR_getppid));
1504 }
1505 
1506 TEST_F(TRACE_syscall, syscall_redirected)
1507 {
1508         long ret;
1509 
1510         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1511         ASSERT_EQ(0, ret);
1512 
1513         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1514         ASSERT_EQ(0, ret);
1515 
1516         /* getpid has been redirected to getppid as expected. */
1517         EXPECT_EQ(self->parent, syscall(__NR_getpid));
1518         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1519 }
1520 
1521 TEST_F(TRACE_syscall, syscall_dropped)
1522 {
1523         long ret;
1524 
1525         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1526         ASSERT_EQ(0, ret);
1527 
1528         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1529         ASSERT_EQ(0, ret);
1530 
1531 #ifdef SYSCALL_NUM_RET_SHARE_REG
1532         /* gettid has been skipped */
1533         EXPECT_EQ(-1, syscall(__NR_gettid));
1534 #else
1535         /* gettid has been skipped and an altered return value stored. */
1536         EXPECT_EQ(1, syscall(__NR_gettid));
1537 #endif
1538         EXPECT_NE(self->mytid, syscall(__NR_gettid));
1539 }
1540 
1541 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1542 {
1543         struct sock_filter filter[] = {
1544                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1545                         offsetof(struct seccomp_data, nr)),
1546                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1547                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1548                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1549         };
1550         struct sock_fprog prog = {
1551                 .len = (unsigned short)ARRAY_SIZE(filter),
1552                 .filter = filter,
1553         };
1554         long ret;
1555 
1556         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1557         ASSERT_EQ(0, ret);
1558 
1559         /* Install fixture filter. */
1560         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1561         ASSERT_EQ(0, ret);
1562 
1563         /* Install "errno on getppid" filter. */
1564         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1565         ASSERT_EQ(0, ret);
1566 
1567         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1568         EXPECT_EQ(-1, syscall(__NR_getpid));
1569         EXPECT_EQ(EPERM, errno);
1570 }
1571 
1572 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
1573 {
1574         struct sock_filter filter[] = {
1575                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1576                         offsetof(struct seccomp_data, nr)),
1577                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1578                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1579                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1580         };
1581         struct sock_fprog prog = {
1582                 .len = (unsigned short)ARRAY_SIZE(filter),
1583                 .filter = filter,
1584         };
1585         long ret;
1586 
1587         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1588         ASSERT_EQ(0, ret);
1589 
1590         /* Install fixture filter. */
1591         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1592         ASSERT_EQ(0, ret);
1593 
1594         /* Install "death on getppid" filter. */
1595         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1596         ASSERT_EQ(0, ret);
1597 
1598         /* Tracer will redirect getpid to getppid, and we should die. */
1599         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1600 }
1601 
1602 TEST_F(TRACE_syscall, skip_after_ptrace)
1603 {
1604         struct sock_filter filter[] = {
1605                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1606                         offsetof(struct seccomp_data, nr)),
1607                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1608                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1609                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1610         };
1611         struct sock_fprog prog = {
1612                 .len = (unsigned short)ARRAY_SIZE(filter),
1613                 .filter = filter,
1614         };
1615         long ret;
1616 
1617         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1618         teardown_trace_fixture(_metadata, self->tracer);
1619         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1620                                            true);
1621 
1622         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1623         ASSERT_EQ(0, ret);
1624 
1625         /* Install "errno on getppid" filter. */
1626         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1627         ASSERT_EQ(0, ret);
1628 
1629         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1630         EXPECT_EQ(-1, syscall(__NR_getpid));
1631         EXPECT_EQ(EPERM, errno);
1632 }
1633 
1634 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
1635 {
1636         struct sock_filter filter[] = {
1637                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1638                         offsetof(struct seccomp_data, nr)),
1639                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1640                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1641                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1642         };
1643         struct sock_fprog prog = {
1644                 .len = (unsigned short)ARRAY_SIZE(filter),
1645                 .filter = filter,
1646         };
1647         long ret;
1648 
1649         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1650         teardown_trace_fixture(_metadata, self->tracer);
1651         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1652                                            true);
1653 
1654         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1655         ASSERT_EQ(0, ret);
1656 
1657         /* Install "death on getppid" filter. */
1658         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1659         ASSERT_EQ(0, ret);
1660 
1661         /* Tracer will redirect getpid to getppid, and we should die. */
1662         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1663 }
1664 
1665 #ifndef __NR_seccomp
1666 # if defined(__i386__)
1667 #  define __NR_seccomp 354
1668 # elif defined(__x86_64__)
1669 #  define __NR_seccomp 317
1670 # elif defined(__arm__)
1671 #  define __NR_seccomp 383
1672 # elif defined(__aarch64__)
1673 #  define __NR_seccomp 277
1674 # elif defined(__hppa__)
1675 #  define __NR_seccomp 338
1676 # elif defined(__powerpc__)
1677 #  define __NR_seccomp 358
1678 # elif defined(__s390__)
1679 #  define __NR_seccomp 348
1680 # else
1681 #  warning "seccomp syscall number unknown for this architecture"
1682 #  define __NR_seccomp 0xffff
1683 # endif
1684 #endif
1685 
1686 #ifndef SECCOMP_SET_MODE_STRICT
1687 #define SECCOMP_SET_MODE_STRICT 0
1688 #endif
1689 
1690 #ifndef SECCOMP_SET_MODE_FILTER
1691 #define SECCOMP_SET_MODE_FILTER 1
1692 #endif
1693 
1694 #ifndef SECCOMP_FILTER_FLAG_TSYNC
1695 #define SECCOMP_FILTER_FLAG_TSYNC 1
1696 #endif
1697 
1698 #ifndef seccomp
1699 int seccomp(unsigned int op, unsigned int flags, void *args)
1700 {
1701         errno = 0;
1702         return syscall(__NR_seccomp, op, flags, args);
1703 }
1704 #endif
1705 
1706 TEST(seccomp_syscall)
1707 {
1708         struct sock_filter filter[] = {
1709                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1710         };
1711         struct sock_fprog prog = {
1712                 .len = (unsigned short)ARRAY_SIZE(filter),
1713                 .filter = filter,
1714         };
1715         long ret;
1716 
1717         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1718         ASSERT_EQ(0, ret) {
1719                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1720         }
1721 
1722         /* Reject insane operation. */
1723         ret = seccomp(-1, 0, &prog);
1724         ASSERT_NE(ENOSYS, errno) {
1725                 TH_LOG("Kernel does not support seccomp syscall!");
1726         }
1727         EXPECT_EQ(EINVAL, errno) {
1728                 TH_LOG("Did not reject crazy op value!");
1729         }
1730 
1731         /* Reject strict with flags or pointer. */
1732         ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
1733         EXPECT_EQ(EINVAL, errno) {
1734                 TH_LOG("Did not reject mode strict with flags!");
1735         }
1736         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
1737         EXPECT_EQ(EINVAL, errno) {
1738                 TH_LOG("Did not reject mode strict with uargs!");
1739         }
1740 
1741         /* Reject insane args for filter. */
1742         ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
1743         EXPECT_EQ(EINVAL, errno) {
1744                 TH_LOG("Did not reject crazy filter flags!");
1745         }
1746         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
1747         EXPECT_EQ(EFAULT, errno) {
1748                 TH_LOG("Did not reject NULL filter!");
1749         }
1750 
1751         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1752         EXPECT_EQ(0, errno) {
1753                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
1754                         strerror(errno));
1755         }
1756 }
1757 
1758 TEST(seccomp_syscall_mode_lock)
1759 {
1760         struct sock_filter filter[] = {
1761                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1762         };
1763         struct sock_fprog prog = {
1764                 .len = (unsigned short)ARRAY_SIZE(filter),
1765                 .filter = filter,
1766         };
1767         long ret;
1768 
1769         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1770         ASSERT_EQ(0, ret) {
1771                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1772         }
1773 
1774         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1775         ASSERT_NE(ENOSYS, errno) {
1776                 TH_LOG("Kernel does not support seccomp syscall!");
1777         }
1778         EXPECT_EQ(0, ret) {
1779                 TH_LOG("Could not install filter!");
1780         }
1781 
1782         /* Make sure neither entry point will switch to strict. */
1783         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
1784         EXPECT_EQ(EINVAL, errno) {
1785                 TH_LOG("Switched to mode strict!");
1786         }
1787 
1788         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
1789         EXPECT_EQ(EINVAL, errno) {
1790                 TH_LOG("Switched to mode strict!");
1791         }
1792 }
1793 
1794 TEST(TSYNC_first)
1795 {
1796         struct sock_filter filter[] = {
1797                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1798         };
1799         struct sock_fprog prog = {
1800                 .len = (unsigned short)ARRAY_SIZE(filter),
1801                 .filter = filter,
1802         };
1803         long ret;
1804 
1805         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1806         ASSERT_EQ(0, ret) {
1807                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1808         }
1809 
1810         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
1811                       &prog);
1812         ASSERT_NE(ENOSYS, errno) {
1813                 TH_LOG("Kernel does not support seccomp syscall!");
1814         }
1815         EXPECT_EQ(0, ret) {
1816                 TH_LOG("Could not install initial filter with TSYNC!");
1817         }
1818 }
1819 
1820 #define TSYNC_SIBLINGS 2
1821 struct tsync_sibling {
1822         pthread_t tid;
1823         pid_t system_tid;
1824         sem_t *started;
1825         pthread_cond_t *cond;
1826         pthread_mutex_t *mutex;
1827         int diverge;
1828         int num_waits;
1829         struct sock_fprog *prog;
1830         struct __test_metadata *metadata;
1831 };
1832 
1833 /*
1834  * To avoid joining joined threads (which is not allowed by Bionic),
1835  * make sure we both successfully join and clear the tid to skip a
1836  * later join attempt during fixture teardown. Any remaining threads
1837  * will be directly killed during teardown.
1838  */
1839 #define PTHREAD_JOIN(tid, status)                                       \
1840         do {                                                            \
1841                 int _rc = pthread_join(tid, status);                    \
1842                 if (_rc) {                                              \
1843                         TH_LOG("pthread_join of tid %u failed: %d\n",   \
1844                                 (unsigned int)tid, _rc);                \
1845                 } else {                                                \
1846                         tid = 0;                                        \
1847                 }                                                       \
1848         } while (0)
1849 
1850 FIXTURE_DATA(TSYNC) {
1851         struct sock_fprog root_prog, apply_prog;
1852         struct tsync_sibling sibling[TSYNC_SIBLINGS];
1853         sem_t started;
1854         pthread_cond_t cond;
1855         pthread_mutex_t mutex;
1856         int sibling_count;
1857 };
1858 
1859 FIXTURE_SETUP(TSYNC)
1860 {
1861         struct sock_filter root_filter[] = {
1862                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1863         };
1864         struct sock_filter apply_filter[] = {
1865                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1866                         offsetof(struct seccomp_data, nr)),
1867                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1868                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1869                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1870         };
1871 
1872         memset(&self->root_prog, 0, sizeof(self->root_prog));
1873         memset(&self->apply_prog, 0, sizeof(self->apply_prog));
1874         memset(&self->sibling, 0, sizeof(self->sibling));
1875         self->root_prog.filter = malloc(sizeof(root_filter));
1876         ASSERT_NE(NULL, self->root_prog.filter);
1877         memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
1878         self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
1879 
1880         self->apply_prog.filter = malloc(sizeof(apply_filter));
1881         ASSERT_NE(NULL, self->apply_prog.filter);
1882         memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
1883         self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
1884 
1885         self->sibling_count = 0;
1886         pthread_mutex_init(&self->mutex, NULL);
1887         pthread_cond_init(&self->cond, NULL);
1888         sem_init(&self->started, 0, 0);
1889         self->sibling[0].tid = 0;
1890         self->sibling[0].cond = &self->cond;
1891         self->sibling[0].started = &self->started;
1892         self->sibling[0].mutex = &self->mutex;
1893         self->sibling[0].diverge = 0;
1894         self->sibling[0].num_waits = 1;
1895         self->sibling[0].prog = &self->root_prog;
1896         self->sibling[0].metadata = _metadata;
1897         self->sibling[1].tid = 0;
1898         self->sibling[1].cond = &self->cond;
1899         self->sibling[1].started = &self->started;
1900         self->sibling[1].mutex = &self->mutex;
1901         self->sibling[1].diverge = 0;
1902         self->sibling[1].prog = &self->root_prog;
1903         self->sibling[1].num_waits = 1;
1904         self->sibling[1].metadata = _metadata;
1905 }
1906 
1907 FIXTURE_TEARDOWN(TSYNC)
1908 {
1909         int sib = 0;
1910 
1911         if (self->root_prog.filter)
1912                 free(self->root_prog.filter);
1913         if (self->apply_prog.filter)
1914                 free(self->apply_prog.filter);
1915 
1916         for ( ; sib < self->sibling_count; ++sib) {
1917                 struct tsync_sibling *s = &self->sibling[sib];
1918 
1919                 if (!s->tid)
1920                         continue;
1921                 /*
1922                  * If a thread is still running, it may be stuck, so hit
1923                  * it over the head really hard.
1924                  */
1925                 pthread_kill(s->tid, 9);
1926         }
1927         pthread_mutex_destroy(&self->mutex);
1928         pthread_cond_destroy(&self->cond);
1929         sem_destroy(&self->started);
1930 }
1931 
1932 void *tsync_sibling(void *data)
1933 {
1934         long ret = 0;
1935         struct tsync_sibling *me = data;
1936 
1937         me->system_tid = syscall(__NR_gettid);
1938 
1939         pthread_mutex_lock(me->mutex);
1940         if (me->diverge) {
1941                 /* Just re-apply the root prog to fork the tree */
1942                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
1943                                 me->prog, 0, 0);
1944         }
1945         sem_post(me->started);
1946         /* Return outside of started so parent notices failures. */
1947         if (ret) {
1948                 pthread_mutex_unlock(me->mutex);
1949                 return (void *)SIBLING_EXIT_FAILURE;
1950         }
1951         do {
1952                 pthread_cond_wait(me->cond, me->mutex);
1953                 me->num_waits = me->num_waits - 1;
1954         } while (me->num_waits);
1955         pthread_mutex_unlock(me->mutex);
1956 
1957         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1958         if (!ret)
1959                 return (void *)SIBLING_EXIT_NEWPRIVS;
1960         read(0, NULL, 0);
1961         return (void *)SIBLING_EXIT_UNKILLED;
1962 }
1963 
1964 void tsync_start_sibling(struct tsync_sibling *sibling)
1965 {
1966         pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
1967 }
1968 
1969 TEST_F(TSYNC, siblings_fail_prctl)
1970 {
1971         long ret;
1972         void *status;
1973         struct sock_filter filter[] = {
1974                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1975                         offsetof(struct seccomp_data, nr)),
1976                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
1977                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
1978                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1979         };
1980         struct sock_fprog prog = {
1981                 .len = (unsigned short)ARRAY_SIZE(filter),
1982                 .filter = filter,
1983         };
1984 
1985         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1986                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1987         }
1988 
1989         /* Check prctl failure detection by requesting sib 0 diverge. */
1990         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1991         ASSERT_NE(ENOSYS, errno) {
1992                 TH_LOG("Kernel does not support seccomp syscall!");
1993         }
1994         ASSERT_EQ(0, ret) {
1995                 TH_LOG("setting filter failed");
1996         }
1997 
1998         self->sibling[0].diverge = 1;
1999         tsync_start_sibling(&self->sibling[0]);
2000         tsync_start_sibling(&self->sibling[1]);
2001 
2002         while (self->sibling_count < TSYNC_SIBLINGS) {
2003                 sem_wait(&self->started);
2004                 self->sibling_count++;
2005         }
2006 
2007         /* Signal the threads to clean up*/
2008         pthread_mutex_lock(&self->mutex);
2009         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2010                 TH_LOG("cond broadcast non-zero");
2011         }
2012         pthread_mutex_unlock(&self->mutex);
2013 
2014         /* Ensure diverging sibling failed to call prctl. */
2015         PTHREAD_JOIN(self->sibling[0].tid, &status);
2016         EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2017         PTHREAD_JOIN(self->sibling[1].tid, &status);
2018         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2019 }
2020 
2021 TEST_F(TSYNC, two_siblings_with_ancestor)
2022 {
2023         long ret;
2024         void *status;
2025 
2026         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2027                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2028         }
2029 
2030         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2031         ASSERT_NE(ENOSYS, errno) {
2032                 TH_LOG("Kernel does not support seccomp syscall!");
2033         }
2034         ASSERT_EQ(0, ret) {
2035                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2036         }
2037         tsync_start_sibling(&self->sibling[0]);
2038         tsync_start_sibling(&self->sibling[1]);
2039 
2040         while (self->sibling_count < TSYNC_SIBLINGS) {
2041                 sem_wait(&self->started);
2042                 self->sibling_count++;
2043         }
2044 
2045         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2046                       &self->apply_prog);
2047         ASSERT_EQ(0, ret) {
2048                 TH_LOG("Could install filter on all threads!");
2049         }
2050         /* Tell the siblings to test the policy */
2051         pthread_mutex_lock(&self->mutex);
2052         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2053                 TH_LOG("cond broadcast non-zero");
2054         }
2055         pthread_mutex_unlock(&self->mutex);
2056         /* Ensure they are both killed and don't exit cleanly. */
2057         PTHREAD_JOIN(self->sibling[0].tid, &status);
2058         EXPECT_EQ(0x0, (long)status);
2059         PTHREAD_JOIN(self->sibling[1].tid, &status);
2060         EXPECT_EQ(0x0, (long)status);
2061 }
2062 
2063 TEST_F(TSYNC, two_sibling_want_nnp)
2064 {
2065         void *status;
2066 
2067         /* start siblings before any prctl() operations */
2068         tsync_start_sibling(&self->sibling[0]);
2069         tsync_start_sibling(&self->sibling[1]);
2070         while (self->sibling_count < TSYNC_SIBLINGS) {
2071                 sem_wait(&self->started);
2072                 self->sibling_count++;
2073         }
2074 
2075         /* Tell the siblings to test no policy */
2076         pthread_mutex_lock(&self->mutex);
2077         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2078                 TH_LOG("cond broadcast non-zero");
2079         }
2080         pthread_mutex_unlock(&self->mutex);
2081 
2082         /* Ensure they are both upset about lacking nnp. */
2083         PTHREAD_JOIN(self->sibling[0].tid, &status);
2084         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2085         PTHREAD_JOIN(self->sibling[1].tid, &status);
2086         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2087 }
2088 
2089 TEST_F(TSYNC, two_siblings_with_no_filter)
2090 {
2091         long ret;
2092         void *status;
2093 
2094         /* start siblings before any prctl() operations */
2095         tsync_start_sibling(&self->sibling[0]);
2096         tsync_start_sibling(&self->sibling[1]);
2097         while (self->sibling_count < TSYNC_SIBLINGS) {
2098                 sem_wait(&self->started);
2099                 self->sibling_count++;
2100         }
2101 
2102         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2103                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2104         }
2105 
2106         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2107                       &self->apply_prog);
2108         ASSERT_NE(ENOSYS, errno) {
2109                 TH_LOG("Kernel does not support seccomp syscall!");
2110         }
2111         ASSERT_EQ(0, ret) {
2112                 TH_LOG("Could install filter on all threads!");
2113         }
2114 
2115         /* Tell the siblings to test the policy */
2116         pthread_mutex_lock(&self->mutex);
2117         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2118                 TH_LOG("cond broadcast non-zero");
2119         }
2120         pthread_mutex_unlock(&self->mutex);
2121 
2122         /* Ensure they are both killed and don't exit cleanly. */
2123         PTHREAD_JOIN(self->sibling[0].tid, &status);
2124         EXPECT_EQ(0x0, (long)status);
2125         PTHREAD_JOIN(self->sibling[1].tid, &status);
2126         EXPECT_EQ(0x0, (long)status);
2127 }
2128 
2129 TEST_F(TSYNC, two_siblings_with_one_divergence)
2130 {
2131         long ret;
2132         void *status;
2133 
2134         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2135                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2136         }
2137 
2138         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2139         ASSERT_NE(ENOSYS, errno) {
2140                 TH_LOG("Kernel does not support seccomp syscall!");
2141         }
2142         ASSERT_EQ(0, ret) {
2143                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2144         }
2145         self->sibling[0].diverge = 1;
2146         tsync_start_sibling(&self->sibling[0]);
2147         tsync_start_sibling(&self->sibling[1]);
2148 
2149         while (self->sibling_count < TSYNC_SIBLINGS) {
2150                 sem_wait(&self->started);
2151                 self->sibling_count++;
2152         }
2153 
2154         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2155                       &self->apply_prog);
2156         ASSERT_EQ(self->sibling[0].system_tid, ret) {
2157                 TH_LOG("Did not fail on diverged sibling.");
2158         }
2159 
2160         /* Wake the threads */
2161         pthread_mutex_lock(&self->mutex);
2162         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2163                 TH_LOG("cond broadcast non-zero");
2164         }
2165         pthread_mutex_unlock(&self->mutex);
2166 
2167         /* Ensure they are both unkilled. */
2168         PTHREAD_JOIN(self->sibling[0].tid, &status);
2169         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2170         PTHREAD_JOIN(self->sibling[1].tid, &status);
2171         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2172 }
2173 
2174 TEST_F(TSYNC, two_siblings_not_under_filter)
2175 {
2176         long ret, sib;
2177         void *status;
2178 
2179         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2180                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2181         }
2182 
2183         /*
2184          * Sibling 0 will have its own seccomp policy
2185          * and Sibling 1 will not be under seccomp at
2186          * all. Sibling 1 will enter seccomp and 0
2187          * will cause failure.
2188          */
2189         self->sibling[0].diverge = 1;
2190         tsync_start_sibling(&self->sibling[0]);
2191         tsync_start_sibling(&self->sibling[1]);
2192 
2193         while (self->sibling_count < TSYNC_SIBLINGS) {
2194                 sem_wait(&self->started);
2195                 self->sibling_count++;
2196         }
2197 
2198         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2199         ASSERT_NE(ENOSYS, errno) {
2200                 TH_LOG("Kernel does not support seccomp syscall!");
2201         }
2202         ASSERT_EQ(0, ret) {
2203                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2204         }
2205 
2206         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2207                       &self->apply_prog);
2208         ASSERT_EQ(ret, self->sibling[0].system_tid) {
2209                 TH_LOG("Did not fail on diverged sibling.");
2210         }
2211         sib = 1;
2212         if (ret == self->sibling[0].system_tid)
2213                 sib = 0;
2214 
2215         pthread_mutex_lock(&self->mutex);
2216 
2217         /* Increment the other siblings num_waits so we can clean up
2218          * the one we just saw.
2219          */
2220         self->sibling[!sib].num_waits += 1;
2221 
2222         /* Signal the thread to clean up*/
2223         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2224                 TH_LOG("cond broadcast non-zero");
2225         }
2226         pthread_mutex_unlock(&self->mutex);
2227         PTHREAD_JOIN(self->sibling[sib].tid, &status);
2228         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2229         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2230         while (!kill(self->sibling[sib].system_tid, 0))
2231                 sleep(0.1);
2232         /* Switch to the remaining sibling */
2233         sib = !sib;
2234 
2235         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2236                       &self->apply_prog);
2237         ASSERT_EQ(0, ret) {
2238                 TH_LOG("Expected the remaining sibling to sync");
2239         };
2240 
2241         pthread_mutex_lock(&self->mutex);
2242 
2243         /* If remaining sibling didn't have a chance to wake up during
2244          * the first broadcast, manually reduce the num_waits now.
2245          */
2246         if (self->sibling[sib].num_waits > 1)
2247                 self->sibling[sib].num_waits = 1;
2248         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2249                 TH_LOG("cond broadcast non-zero");
2250         }
2251         pthread_mutex_unlock(&self->mutex);
2252         PTHREAD_JOIN(self->sibling[sib].tid, &status);
2253         EXPECT_EQ(0, (long)status);
2254         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2255         while (!kill(self->sibling[sib].system_tid, 0))
2256                 sleep(0.1);
2257 
2258         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2259                       &self->apply_prog);
2260         ASSERT_EQ(0, ret);  /* just us chickens */
2261 }
2262 
2263 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
2264 TEST(syscall_restart)
2265 {
2266         long ret;
2267         unsigned long msg;
2268         pid_t child_pid;
2269         int pipefd[2];
2270         int status;
2271         siginfo_t info = { };
2272         struct sock_filter filter[] = {
2273                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2274                          offsetof(struct seccomp_data, nr)),
2275 
2276 #ifdef __NR_sigreturn
2277                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2278 #endif
2279                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2280                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2281                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2282                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2283                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2284 
2285                 /* Allow __NR_write for easy logging. */
2286                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2287                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2288                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2289                 /* The nanosleep jump target. */
2290                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2291                 /* The restart_syscall jump target. */
2292                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2293         };
2294         struct sock_fprog prog = {
2295                 .len = (unsigned short)ARRAY_SIZE(filter),
2296                 .filter = filter,
2297         };
2298 #if defined(__arm__)
2299         struct utsname utsbuf;
2300 #endif
2301 
2302         ASSERT_EQ(0, pipe(pipefd));
2303 
2304         child_pid = fork();
2305         ASSERT_LE(0, child_pid);
2306         if (child_pid == 0) {
2307                 /* Child uses EXPECT not ASSERT to deliver status correctly. */
2308                 char buf = ' ';
2309                 struct timespec timeout = { };
2310 
2311                 /* Attach parent as tracer and stop. */
2312                 EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2313                 EXPECT_EQ(0, raise(SIGSTOP));
2314 
2315                 EXPECT_EQ(0, close(pipefd[1]));
2316 
2317                 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2318                         TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2319                 }
2320 
2321                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2322                 EXPECT_EQ(0, ret) {
2323                         TH_LOG("Failed to install filter!");
2324                 }
2325 
2326                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2327                         TH_LOG("Failed to read() sync from parent");
2328                 }
2329                 EXPECT_EQ('.', buf) {
2330                         TH_LOG("Failed to get sync data from read()");
2331                 }
2332 
2333                 /* Start nanosleep to be interrupted. */
2334                 timeout.tv_sec = 1;
2335                 errno = 0;
2336                 EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2337                         TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2338                 }
2339 
2340                 /* Read final sync from parent. */
2341                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2342                         TH_LOG("Failed final read() from parent");
2343                 }
2344                 EXPECT_EQ('!', buf) {
2345                         TH_LOG("Failed to get final data from read()");
2346                 }
2347 
2348                 /* Directly report the status of our test harness results. */
2349                 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2350                                                      : EXIT_FAILURE);
2351         }
2352         EXPECT_EQ(0, close(pipefd[0]));
2353 
2354         /* Attach to child, setup options, and release. */
2355         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2356         ASSERT_EQ(true, WIFSTOPPED(status));
2357         ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2358                             PTRACE_O_TRACESECCOMP));
2359         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2360         ASSERT_EQ(1, write(pipefd[1], ".", 1));
2361 
2362         /* Wait for nanosleep() to start. */
2363         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2364         ASSERT_EQ(true, WIFSTOPPED(status));
2365         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2366         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2367         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2368         ASSERT_EQ(0x100, msg);
2369         EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2370 
2371         /* Might as well check siginfo for sanity while we're here. */
2372         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2373         ASSERT_EQ(SIGTRAP, info.si_signo);
2374         ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2375         EXPECT_EQ(0, info.si_errno);
2376         EXPECT_EQ(getuid(), info.si_uid);
2377         /* Verify signal delivery came from child (seccomp-triggered). */
2378         EXPECT_EQ(child_pid, info.si_pid);
2379 
2380         /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2381         ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2382         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2383         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2384         ASSERT_EQ(true, WIFSTOPPED(status));
2385         ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2386         /* Verify signal delivery came from parent now. */
2387         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2388         EXPECT_EQ(getpid(), info.si_pid);
2389 
2390         /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2391         ASSERT_EQ(0, kill(child_pid, SIGCONT));
2392         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2393         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2394         ASSERT_EQ(true, WIFSTOPPED(status));
2395         ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2396         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2397 
2398         /* Wait for restart_syscall() to start. */
2399         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2400         ASSERT_EQ(true, WIFSTOPPED(status));
2401         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2402         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2403         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2404 
2405         ASSERT_EQ(0x200, msg);
2406         ret = get_syscall(_metadata, child_pid);
2407 #if defined(__arm__)
2408         /*
2409          * FIXME:
2410          * - native ARM registers do NOT expose true syscall.
2411          * - compat ARM registers on ARM64 DO expose true syscall.
2412          */
2413         ASSERT_EQ(0, uname(&utsbuf));
2414         if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2415                 EXPECT_EQ(__NR_nanosleep, ret);
2416         } else
2417 #endif
2418         {
2419                 EXPECT_EQ(__NR_restart_syscall, ret);
2420         }
2421 
2422         /* Write again to end test. */
2423         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2424         ASSERT_EQ(1, write(pipefd[1], "!", 1));
2425         EXPECT_EQ(0, close(pipefd[1]));
2426 
2427         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2428         if (WIFSIGNALED(status) || WEXITSTATUS(status))
2429                 _metadata->passed = 0;
2430 }
2431 
2432 /*
2433  * TODO:
2434  * - add microbenchmarks
2435  * - expand NNP testing
2436  * - better arch-specific TRACE and TRAP handlers.
2437  * - endianness checking when appropriate
2438  * - 64-bit arg prodding
2439  * - arch value testing (x86 modes especially)
2440  * - ...
2441  */
2442 
2443 TEST_HARNESS_MAIN
2444 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp