]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - tools/testing/selftests/seccomp/seccomp_bpf.c
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
[karo-tx-linux.git] / tools / testing / selftests / seccomp / seccomp_bpf.c
1 /*
2  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by the GPLv2 license.
4  *
5  * Test code for seccomp bpf.
6  */
7
8 #include <asm/siginfo.h>
9 #define __have_siginfo_t 1
10 #define __have_sigval_t 1
11 #define __have_sigevent_t 1
12
13 #include <errno.h>
14 #include <linux/filter.h>
15 #include <sys/prctl.h>
16 #include <sys/ptrace.h>
17 #include <sys/types.h>
18 #include <sys/user.h>
19 #include <linux/prctl.h>
20 #include <linux/ptrace.h>
21 #include <linux/seccomp.h>
22 #include <poll.h>
23 #include <pthread.h>
24 #include <semaphore.h>
25 #include <signal.h>
26 #include <stddef.h>
27 #include <stdbool.h>
28 #include <string.h>
29 #include <linux/elf.h>
30 #include <sys/uio.h>
31
32 #define _GNU_SOURCE
33 #include <unistd.h>
34 #include <sys/syscall.h>
35
36 #include "test_harness.h"
37
38 #ifndef PR_SET_PTRACER
39 # define PR_SET_PTRACER 0x59616d61
40 #endif
41
42 #ifndef PR_SET_NO_NEW_PRIVS
43 #define PR_SET_NO_NEW_PRIVS 38
44 #define PR_GET_NO_NEW_PRIVS 39
45 #endif
46
47 #ifndef PR_SECCOMP_EXT
48 #define PR_SECCOMP_EXT 43
49 #endif
50
51 #ifndef SECCOMP_EXT_ACT
52 #define SECCOMP_EXT_ACT 1
53 #endif
54
55 #ifndef SECCOMP_EXT_ACT_TSYNC
56 #define SECCOMP_EXT_ACT_TSYNC 1
57 #endif
58
59 #ifndef SECCOMP_MODE_STRICT
60 #define SECCOMP_MODE_STRICT 1
61 #endif
62
63 #ifndef SECCOMP_MODE_FILTER
64 #define SECCOMP_MODE_FILTER 2
65 #endif
66
67 #ifndef SECCOMP_RET_KILL
68 #define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
69 #define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
70 #define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
71 #define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
72 #define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
73
74 /* Masks for the return value sections. */
75 #define SECCOMP_RET_ACTION      0x7fff0000U
76 #define SECCOMP_RET_DATA        0x0000ffffU
77
78 struct seccomp_data {
79         int nr;
80         __u32 arch;
81         __u64 instruction_pointer;
82         __u64 args[6];
83 };
84 #endif
85
86 #if __BYTE_ORDER == __LITTLE_ENDIAN
87 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
88 #elif __BYTE_ORDER == __BIG_ENDIAN
89 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
90 #else
91 #error "wut? Unknown __BYTE_ORDER?!"
92 #endif
93
94 #define SIBLING_EXIT_UNKILLED   0xbadbeef
95 #define SIBLING_EXIT_FAILURE    0xbadface
96 #define SIBLING_EXIT_NEWPRIVS   0xbadfeed
97
98 TEST(mode_strict_support)
99 {
100         long ret;
101
102         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
103         ASSERT_EQ(0, ret) {
104                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
105         }
106         syscall(__NR_exit, 1);
107 }
108
109 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
110 {
111         long ret;
112
113         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
114         ASSERT_EQ(0, ret) {
115                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
116         }
117         syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
118                 NULL, NULL, NULL);
119         EXPECT_FALSE(true) {
120                 TH_LOG("Unreachable!");
121         }
122 }
123
124 /* Note! This doesn't test no new privs behavior */
125 TEST(no_new_privs_support)
126 {
127         long ret;
128
129         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
130         EXPECT_EQ(0, ret) {
131                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
132         }
133 }
134
135 /* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
136 TEST(mode_filter_support)
137 {
138         long ret;
139
140         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
141         ASSERT_EQ(0, ret) {
142                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
143         }
144         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
145         EXPECT_EQ(-1, ret);
146         EXPECT_EQ(EFAULT, errno) {
147                 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
148         }
149 }
150
151 TEST(mode_filter_without_nnp)
152 {
153         struct sock_filter filter[] = {
154                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
155         };
156         struct sock_fprog prog = {
157                 .len = (unsigned short)ARRAY_SIZE(filter),
158                 .filter = filter,
159         };
160         long ret;
161
162         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
163         ASSERT_LE(0, ret) {
164                 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
165         }
166         errno = 0;
167         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
168         /* Succeeds with CAP_SYS_ADMIN, fails without */
169         /* TODO(wad) check caps not euid */
170         if (geteuid()) {
171                 EXPECT_EQ(-1, ret);
172                 EXPECT_EQ(EACCES, errno);
173         } else {
174                 EXPECT_EQ(0, ret);
175         }
176 }
177
178 #define MAX_INSNS_PER_PATH 32768
179
180 TEST(filter_size_limits)
181 {
182         int i;
183         int count = BPF_MAXINSNS + 1;
184         struct sock_filter allow[] = {
185                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
186         };
187         struct sock_filter *filter;
188         struct sock_fprog prog = { };
189         long ret;
190
191         filter = calloc(count, sizeof(*filter));
192         ASSERT_NE(NULL, filter);
193
194         for (i = 0; i < count; i++)
195                 filter[i] = allow[0];
196
197         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
198         ASSERT_EQ(0, ret);
199
200         prog.filter = filter;
201         prog.len = count;
202
203         /* Too many filter instructions in a single filter. */
204         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
205         ASSERT_NE(0, ret) {
206                 TH_LOG("Installing %d insn filter was allowed", prog.len);
207         }
208
209         /* One less is okay, though. */
210         prog.len -= 1;
211         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
212         ASSERT_EQ(0, ret) {
213                 TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
214         }
215 }
216
217 TEST(filter_chain_limits)
218 {
219         int i;
220         int count = BPF_MAXINSNS;
221         struct sock_filter allow[] = {
222                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
223         };
224         struct sock_filter *filter;
225         struct sock_fprog prog = { };
226         long ret;
227
228         filter = calloc(count, sizeof(*filter));
229         ASSERT_NE(NULL, filter);
230
231         for (i = 0; i < count; i++)
232                 filter[i] = allow[0];
233
234         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
235         ASSERT_EQ(0, ret);
236
237         prog.filter = filter;
238         prog.len = 1;
239
240         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
241         ASSERT_EQ(0, ret);
242
243         prog.len = count;
244
245         /* Too many total filter instructions. */
246         for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
247                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
248                 if (ret != 0)
249                         break;
250         }
251         ASSERT_NE(0, ret) {
252                 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
253                        i, count, i * (count + 4));
254         }
255 }
256
257 TEST(mode_filter_cannot_move_to_strict)
258 {
259         struct sock_filter filter[] = {
260                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
261         };
262         struct sock_fprog prog = {
263                 .len = (unsigned short)ARRAY_SIZE(filter),
264                 .filter = filter,
265         };
266         long ret;
267
268         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
269         ASSERT_EQ(0, ret);
270
271         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
272         ASSERT_EQ(0, ret);
273
274         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
275         EXPECT_EQ(-1, ret);
276         EXPECT_EQ(EINVAL, errno);
277 }
278
279
280 TEST(mode_filter_get_seccomp)
281 {
282         struct sock_filter filter[] = {
283                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
284         };
285         struct sock_fprog prog = {
286                 .len = (unsigned short)ARRAY_SIZE(filter),
287                 .filter = filter,
288         };
289         long ret;
290
291         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
292         ASSERT_EQ(0, ret);
293
294         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
295         EXPECT_EQ(0, ret);
296
297         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
298         ASSERT_EQ(0, ret);
299
300         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
301         EXPECT_EQ(2, ret);
302 }
303
304
305 TEST(ALLOW_all)
306 {
307         struct sock_filter filter[] = {
308                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
309         };
310         struct sock_fprog prog = {
311                 .len = (unsigned short)ARRAY_SIZE(filter),
312                 .filter = filter,
313         };
314         long ret;
315
316         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
317         ASSERT_EQ(0, ret);
318
319         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
320         ASSERT_EQ(0, ret);
321 }
322
323 TEST(empty_prog)
324 {
325         struct sock_filter filter[] = {
326         };
327         struct sock_fprog prog = {
328                 .len = (unsigned short)ARRAY_SIZE(filter),
329                 .filter = filter,
330         };
331         long ret;
332
333         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
334         ASSERT_EQ(0, ret);
335
336         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
337         EXPECT_EQ(-1, ret);
338         EXPECT_EQ(EINVAL, errno);
339 }
340
341 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
342 {
343         struct sock_filter filter[] = {
344                 BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
345         };
346         struct sock_fprog prog = {
347                 .len = (unsigned short)ARRAY_SIZE(filter),
348                 .filter = filter,
349         };
350         long ret;
351
352         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
353         ASSERT_EQ(0, ret);
354
355         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
356         ASSERT_EQ(0, ret);
357         EXPECT_EQ(0, syscall(__NR_getpid)) {
358                 TH_LOG("getpid() shouldn't ever return");
359         }
360 }
361
362 /* return code >= 0x80000000 is unused. */
363 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
364 {
365         struct sock_filter filter[] = {
366                 BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
367         };
368         struct sock_fprog prog = {
369                 .len = (unsigned short)ARRAY_SIZE(filter),
370                 .filter = filter,
371         };
372         long ret;
373
374         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
375         ASSERT_EQ(0, ret);
376
377         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
378         ASSERT_EQ(0, ret);
379         EXPECT_EQ(0, syscall(__NR_getpid)) {
380                 TH_LOG("getpid() shouldn't ever return");
381         }
382 }
383
384 TEST_SIGNAL(KILL_all, SIGSYS)
385 {
386         struct sock_filter filter[] = {
387                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
388         };
389         struct sock_fprog prog = {
390                 .len = (unsigned short)ARRAY_SIZE(filter),
391                 .filter = filter,
392         };
393         long ret;
394
395         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
396         ASSERT_EQ(0, ret);
397
398         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
399         ASSERT_EQ(0, ret);
400 }
401
402 TEST_SIGNAL(KILL_one, SIGSYS)
403 {
404         struct sock_filter filter[] = {
405                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
406                         offsetof(struct seccomp_data, nr)),
407                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
408                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
409                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
410         };
411         struct sock_fprog prog = {
412                 .len = (unsigned short)ARRAY_SIZE(filter),
413                 .filter = filter,
414         };
415         long ret;
416         pid_t parent = getppid();
417
418         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
419         ASSERT_EQ(0, ret);
420
421         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
422         ASSERT_EQ(0, ret);
423
424         EXPECT_EQ(parent, syscall(__NR_getppid));
425         /* getpid() should never return. */
426         EXPECT_EQ(0, syscall(__NR_getpid));
427 }
428
429 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
430 {
431         struct sock_filter filter[] = {
432                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
433                         offsetof(struct seccomp_data, nr)),
434                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
435                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
436                 /* Only both with lower 32-bit for now. */
437                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
438                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
439                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
440                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
441         };
442         struct sock_fprog prog = {
443                 .len = (unsigned short)ARRAY_SIZE(filter),
444                 .filter = filter,
445         };
446         long ret;
447         pid_t parent = getppid();
448         pid_t pid = getpid();
449
450         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
451         ASSERT_EQ(0, ret);
452
453         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
454         ASSERT_EQ(0, ret);
455
456         EXPECT_EQ(parent, syscall(__NR_getppid));
457         EXPECT_EQ(pid, syscall(__NR_getpid));
458         /* getpid() should never return. */
459         EXPECT_EQ(0, syscall(__NR_getpid, 0x0C0FFEE));
460 }
461
462 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
463 {
464         struct sock_filter filter[] = {
465                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
466                         offsetof(struct seccomp_data, nr)),
467                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
468                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
469                 /* Only both with lower 32-bit for now. */
470                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
471                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
472                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
473                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
474         };
475         struct sock_fprog prog = {
476                 .len = (unsigned short)ARRAY_SIZE(filter),
477                 .filter = filter,
478         };
479         long ret;
480         pid_t parent = getppid();
481         pid_t pid = getpid();
482
483         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
484         ASSERT_EQ(0, ret);
485
486         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
487         ASSERT_EQ(0, ret);
488
489         EXPECT_EQ(parent, syscall(__NR_getppid));
490         EXPECT_EQ(pid, syscall(__NR_getpid));
491         /* getpid() should never return. */
492         EXPECT_EQ(0, syscall(__NR_getpid, 1, 2, 3, 4, 5, 0x0C0FFEE));
493 }
494
495 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
496 TEST(arg_out_of_range)
497 {
498         struct sock_filter filter[] = {
499                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
500                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
501         };
502         struct sock_fprog prog = {
503                 .len = (unsigned short)ARRAY_SIZE(filter),
504                 .filter = filter,
505         };
506         long ret;
507
508         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
509         ASSERT_EQ(0, ret);
510
511         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
512         EXPECT_EQ(-1, ret);
513         EXPECT_EQ(EINVAL, errno);
514 }
515
516 TEST(ERRNO_valid)
517 {
518         struct sock_filter filter[] = {
519                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
520                         offsetof(struct seccomp_data, nr)),
521                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
522                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
523                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
524         };
525         struct sock_fprog prog = {
526                 .len = (unsigned short)ARRAY_SIZE(filter),
527                 .filter = filter,
528         };
529         long ret;
530         pid_t parent = getppid();
531
532         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
533         ASSERT_EQ(0, ret);
534
535         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
536         ASSERT_EQ(0, ret);
537
538         EXPECT_EQ(parent, syscall(__NR_getppid));
539         EXPECT_EQ(-1, read(0, NULL, 0));
540         EXPECT_EQ(E2BIG, errno);
541 }
542
543 TEST(ERRNO_zero)
544 {
545         struct sock_filter filter[] = {
546                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
547                         offsetof(struct seccomp_data, nr)),
548                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
549                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
550                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
551         };
552         struct sock_fprog prog = {
553                 .len = (unsigned short)ARRAY_SIZE(filter),
554                 .filter = filter,
555         };
556         long ret;
557         pid_t parent = getppid();
558
559         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
560         ASSERT_EQ(0, ret);
561
562         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
563         ASSERT_EQ(0, ret);
564
565         EXPECT_EQ(parent, syscall(__NR_getppid));
566         /* "errno" of 0 is ok. */
567         EXPECT_EQ(0, read(0, NULL, 0));
568 }
569
570 TEST(ERRNO_capped)
571 {
572         struct sock_filter filter[] = {
573                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
574                         offsetof(struct seccomp_data, nr)),
575                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
576                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
577                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
578         };
579         struct sock_fprog prog = {
580                 .len = (unsigned short)ARRAY_SIZE(filter),
581                 .filter = filter,
582         };
583         long ret;
584         pid_t parent = getppid();
585
586         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
587         ASSERT_EQ(0, ret);
588
589         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
590         ASSERT_EQ(0, ret);
591
592         EXPECT_EQ(parent, syscall(__NR_getppid));
593         EXPECT_EQ(-1, read(0, NULL, 0));
594         EXPECT_EQ(4095, errno);
595 }
596
597 FIXTURE_DATA(TRAP) {
598         struct sock_fprog prog;
599 };
600
601 FIXTURE_SETUP(TRAP)
602 {
603         struct sock_filter filter[] = {
604                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
605                         offsetof(struct seccomp_data, nr)),
606                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
607                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
608                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
609         };
610
611         memset(&self->prog, 0, sizeof(self->prog));
612         self->prog.filter = malloc(sizeof(filter));
613         ASSERT_NE(NULL, self->prog.filter);
614         memcpy(self->prog.filter, filter, sizeof(filter));
615         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
616 }
617
618 FIXTURE_TEARDOWN(TRAP)
619 {
620         if (self->prog.filter)
621                 free(self->prog.filter);
622 }
623
624 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
625 {
626         long ret;
627
628         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
629         ASSERT_EQ(0, ret);
630
631         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
632         ASSERT_EQ(0, ret);
633         syscall(__NR_getpid);
634 }
635
636 /* Ensure that SIGSYS overrides SIG_IGN */
637 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
638 {
639         long ret;
640
641         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
642         ASSERT_EQ(0, ret);
643
644         signal(SIGSYS, SIG_IGN);
645
646         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
647         ASSERT_EQ(0, ret);
648         syscall(__NR_getpid);
649 }
650
651 static struct siginfo TRAP_info;
652 static volatile int TRAP_nr;
653 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
654 {
655         memcpy(&TRAP_info, info, sizeof(TRAP_info));
656         TRAP_nr = nr;
657 }
658
659 TEST_F(TRAP, handler)
660 {
661         int ret, test;
662         struct sigaction act;
663         sigset_t mask;
664
665         memset(&act, 0, sizeof(act));
666         sigemptyset(&mask);
667         sigaddset(&mask, SIGSYS);
668
669         act.sa_sigaction = &TRAP_action;
670         act.sa_flags = SA_SIGINFO;
671         ret = sigaction(SIGSYS, &act, NULL);
672         ASSERT_EQ(0, ret) {
673                 TH_LOG("sigaction failed");
674         }
675         ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
676         ASSERT_EQ(0, ret) {
677                 TH_LOG("sigprocmask failed");
678         }
679
680         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
681         ASSERT_EQ(0, ret);
682         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
683         ASSERT_EQ(0, ret);
684         TRAP_nr = 0;
685         memset(&TRAP_info, 0, sizeof(TRAP_info));
686         /* Expect the registers to be rolled back. (nr = error) may vary
687          * based on arch. */
688         ret = syscall(__NR_getpid);
689         /* Silence gcc warning about volatile. */
690         test = TRAP_nr;
691         EXPECT_EQ(SIGSYS, test);
692         struct local_sigsys {
693                 void *_call_addr;       /* calling user insn */
694                 int _syscall;           /* triggering system call number */
695                 unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
696         } *sigsys = (struct local_sigsys *)
697 #ifdef si_syscall
698                 &(TRAP_info.si_call_addr);
699 #else
700                 &TRAP_info.si_pid;
701 #endif
702         EXPECT_EQ(__NR_getpid, sigsys->_syscall);
703         /* Make sure arch is non-zero. */
704         EXPECT_NE(0, sigsys->_arch);
705         EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
706 }
707
708 FIXTURE_DATA(precedence) {
709         struct sock_fprog allow;
710         struct sock_fprog trace;
711         struct sock_fprog error;
712         struct sock_fprog trap;
713         struct sock_fprog kill;
714 };
715
716 FIXTURE_SETUP(precedence)
717 {
718         struct sock_filter allow_insns[] = {
719                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
720         };
721         struct sock_filter trace_insns[] = {
722                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
723                         offsetof(struct seccomp_data, nr)),
724                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
725                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
726                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
727         };
728         struct sock_filter error_insns[] = {
729                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
730                         offsetof(struct seccomp_data, nr)),
731                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
732                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
733                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
734         };
735         struct sock_filter trap_insns[] = {
736                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
737                         offsetof(struct seccomp_data, nr)),
738                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
739                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
740                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
741         };
742         struct sock_filter kill_insns[] = {
743                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
744                         offsetof(struct seccomp_data, nr)),
745                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
746                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
747                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
748         };
749
750         memset(self, 0, sizeof(*self));
751 #define FILTER_ALLOC(_x) \
752         self->_x.filter = malloc(sizeof(_x##_insns)); \
753         ASSERT_NE(NULL, self->_x.filter); \
754         memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
755         self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
756         FILTER_ALLOC(allow);
757         FILTER_ALLOC(trace);
758         FILTER_ALLOC(error);
759         FILTER_ALLOC(trap);
760         FILTER_ALLOC(kill);
761 }
762
763 FIXTURE_TEARDOWN(precedence)
764 {
765 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
766         FILTER_FREE(allow);
767         FILTER_FREE(trace);
768         FILTER_FREE(error);
769         FILTER_FREE(trap);
770         FILTER_FREE(kill);
771 }
772
773 TEST_F(precedence, allow_ok)
774 {
775         pid_t parent, res = 0;
776         long ret;
777
778         parent = getppid();
779         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
780         ASSERT_EQ(0, ret);
781
782         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
783         ASSERT_EQ(0, ret);
784         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
785         ASSERT_EQ(0, ret);
786         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
787         ASSERT_EQ(0, ret);
788         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
789         ASSERT_EQ(0, ret);
790         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
791         ASSERT_EQ(0, ret);
792         /* Should work just fine. */
793         res = syscall(__NR_getppid);
794         EXPECT_EQ(parent, res);
795 }
796
797 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
798 {
799         pid_t parent, res = 0;
800         long ret;
801
802         parent = getppid();
803         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
804         ASSERT_EQ(0, ret);
805
806         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
807         ASSERT_EQ(0, ret);
808         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
809         ASSERT_EQ(0, ret);
810         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
811         ASSERT_EQ(0, ret);
812         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
813         ASSERT_EQ(0, ret);
814         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
815         ASSERT_EQ(0, ret);
816         /* Should work just fine. */
817         res = syscall(__NR_getppid);
818         EXPECT_EQ(parent, res);
819         /* getpid() should never return. */
820         res = syscall(__NR_getpid);
821         EXPECT_EQ(0, res);
822 }
823
824 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
825 {
826         pid_t parent;
827         long ret;
828
829         parent = getppid();
830         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
831         ASSERT_EQ(0, ret);
832
833         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
834         ASSERT_EQ(0, ret);
835         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
836         ASSERT_EQ(0, ret);
837         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
838         ASSERT_EQ(0, ret);
839         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
840         ASSERT_EQ(0, ret);
841         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
842         ASSERT_EQ(0, ret);
843         /* Should work just fine. */
844         EXPECT_EQ(parent, syscall(__NR_getppid));
845         /* getpid() should never return. */
846         EXPECT_EQ(0, syscall(__NR_getpid));
847 }
848
849 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
850 {
851         pid_t parent;
852         long ret;
853
854         parent = getppid();
855         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
856         ASSERT_EQ(0, ret);
857
858         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
859         ASSERT_EQ(0, ret);
860         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
861         ASSERT_EQ(0, ret);
862         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
863         ASSERT_EQ(0, ret);
864         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
865         ASSERT_EQ(0, ret);
866         /* Should work just fine. */
867         EXPECT_EQ(parent, syscall(__NR_getppid));
868         /* getpid() should never return. */
869         EXPECT_EQ(0, syscall(__NR_getpid));
870 }
871
872 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
873 {
874         pid_t parent;
875         long ret;
876
877         parent = getppid();
878         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
879         ASSERT_EQ(0, ret);
880
881         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
882         ASSERT_EQ(0, ret);
883         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
884         ASSERT_EQ(0, ret);
885         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
886         ASSERT_EQ(0, ret);
887         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
888         ASSERT_EQ(0, ret);
889         /* Should work just fine. */
890         EXPECT_EQ(parent, syscall(__NR_getppid));
891         /* getpid() should never return. */
892         EXPECT_EQ(0, syscall(__NR_getpid));
893 }
894
895 TEST_F(precedence, errno_is_third)
896 {
897         pid_t parent;
898         long ret;
899
900         parent = getppid();
901         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
902         ASSERT_EQ(0, ret);
903
904         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
905         ASSERT_EQ(0, ret);
906         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
907         ASSERT_EQ(0, ret);
908         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
909         ASSERT_EQ(0, ret);
910         /* Should work just fine. */
911         EXPECT_EQ(parent, syscall(__NR_getppid));
912         EXPECT_EQ(0, syscall(__NR_getpid));
913 }
914
915 TEST_F(precedence, errno_is_third_in_any_order)
916 {
917         pid_t parent;
918         long ret;
919
920         parent = getppid();
921         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
922         ASSERT_EQ(0, ret);
923
924         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
925         ASSERT_EQ(0, ret);
926         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
927         ASSERT_EQ(0, ret);
928         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
929         ASSERT_EQ(0, ret);
930         /* Should work just fine. */
931         EXPECT_EQ(parent, syscall(__NR_getppid));
932         EXPECT_EQ(0, syscall(__NR_getpid));
933 }
934
935 TEST_F(precedence, trace_is_fourth)
936 {
937         pid_t parent;
938         long ret;
939
940         parent = getppid();
941         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
942         ASSERT_EQ(0, ret);
943
944         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
945         ASSERT_EQ(0, ret);
946         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
947         ASSERT_EQ(0, ret);
948         /* Should work just fine. */
949         EXPECT_EQ(parent, syscall(__NR_getppid));
950         /* No ptracer */
951         EXPECT_EQ(-1, syscall(__NR_getpid));
952 }
953
954 TEST_F(precedence, trace_is_fourth_in_any_order)
955 {
956         pid_t parent;
957         long ret;
958
959         parent = getppid();
960         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
961         ASSERT_EQ(0, ret);
962
963         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
964         ASSERT_EQ(0, ret);
965         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
966         ASSERT_EQ(0, ret);
967         /* Should work just fine. */
968         EXPECT_EQ(parent, syscall(__NR_getppid));
969         /* No ptracer */
970         EXPECT_EQ(-1, syscall(__NR_getpid));
971 }
972
973 #ifndef PTRACE_O_TRACESECCOMP
974 #define PTRACE_O_TRACESECCOMP   0x00000080
975 #endif
976
977 /* Catch the Ubuntu 12.04 value error. */
978 #if PTRACE_EVENT_SECCOMP != 7
979 #undef PTRACE_EVENT_SECCOMP
980 #endif
981
982 #ifndef PTRACE_EVENT_SECCOMP
983 #define PTRACE_EVENT_SECCOMP 7
984 #endif
985
986 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
987 bool tracer_running;
988 void tracer_stop(int sig)
989 {
990         tracer_running = false;
991 }
992
993 typedef void tracer_func_t(struct __test_metadata *_metadata,
994                            pid_t tracee, int status, void *args);
995
996 void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
997             tracer_func_t tracer_func, void *args)
998 {
999         int ret = -1;
1000         struct sigaction action = {
1001                 .sa_handler = tracer_stop,
1002         };
1003
1004         /* Allow external shutdown. */
1005         tracer_running = true;
1006         ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1007
1008         errno = 0;
1009         while (ret == -1 && errno != EINVAL)
1010                 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1011         ASSERT_EQ(0, ret) {
1012                 kill(tracee, SIGKILL);
1013         }
1014         /* Wait for attach stop */
1015         wait(NULL);
1016
1017         ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, PTRACE_O_TRACESECCOMP);
1018         ASSERT_EQ(0, ret) {
1019                 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1020                 kill(tracee, SIGKILL);
1021         }
1022         ptrace(PTRACE_CONT, tracee, NULL, 0);
1023
1024         /* Unblock the tracee */
1025         ASSERT_EQ(1, write(fd, "A", 1));
1026         ASSERT_EQ(0, close(fd));
1027
1028         /* Run until we're shut down. Must assert to stop execution. */
1029         while (tracer_running) {
1030                 int status;
1031
1032                 if (wait(&status) != tracee)
1033                         continue;
1034                 if (WIFSIGNALED(status) || WIFEXITED(status))
1035                         /* Child is dead. Time to go. */
1036                         return;
1037
1038                 /* Make sure this is a seccomp event. */
1039                 ASSERT_EQ(true, IS_SECCOMP_EVENT(status));
1040
1041                 tracer_func(_metadata, tracee, status, args);
1042
1043                 ret = ptrace(PTRACE_CONT, tracee, NULL, NULL);
1044                 ASSERT_EQ(0, ret);
1045         }
1046         /* Directly report the status of our test harness results. */
1047         syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1048 }
1049
1050 /* Common tracer setup/teardown functions. */
1051 void cont_handler(int num)
1052 { }
1053 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1054                           tracer_func_t func, void *args)
1055 {
1056         char sync;
1057         int pipefd[2];
1058         pid_t tracer_pid;
1059         pid_t tracee = getpid();
1060
1061         /* Setup a pipe for clean synchronization. */
1062         ASSERT_EQ(0, pipe(pipefd));
1063
1064         /* Fork a child which we'll promote to tracer */
1065         tracer_pid = fork();
1066         ASSERT_LE(0, tracer_pid);
1067         signal(SIGALRM, cont_handler);
1068         if (tracer_pid == 0) {
1069                 close(pipefd[0]);
1070                 tracer(_metadata, pipefd[1], tracee, func, args);
1071                 syscall(__NR_exit, 0);
1072         }
1073         close(pipefd[1]);
1074         prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1075         read(pipefd[0], &sync, 1);
1076         close(pipefd[0]);
1077
1078         return tracer_pid;
1079 }
1080 void teardown_trace_fixture(struct __test_metadata *_metadata,
1081                             pid_t tracer)
1082 {
1083         if (tracer) {
1084                 int status;
1085                 /*
1086                  * Extract the exit code from the other process and
1087                  * adopt it for ourselves in case its asserts failed.
1088                  */
1089                 ASSERT_EQ(0, kill(tracer, SIGUSR1));
1090                 ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1091                 if (WEXITSTATUS(status))
1092                         _metadata->passed = 0;
1093         }
1094 }
1095
1096 /* "poke" tracer arguments and function. */
1097 struct tracer_args_poke_t {
1098         unsigned long poke_addr;
1099 };
1100
1101 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1102                  void *args)
1103 {
1104         int ret;
1105         unsigned long msg;
1106         struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1107
1108         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1109         EXPECT_EQ(0, ret);
1110         /* If this fails, don't try to recover. */
1111         ASSERT_EQ(0x1001, msg) {
1112                 kill(tracee, SIGKILL);
1113         }
1114         /*
1115          * Poke in the message.
1116          * Registers are not touched to try to keep this relatively arch
1117          * agnostic.
1118          */
1119         ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1120         EXPECT_EQ(0, ret);
1121 }
1122
1123 FIXTURE_DATA(TRACE_poke) {
1124         struct sock_fprog prog;
1125         pid_t tracer;
1126         long poked;
1127         struct tracer_args_poke_t tracer_args;
1128 };
1129
1130 FIXTURE_SETUP(TRACE_poke)
1131 {
1132         struct sock_filter filter[] = {
1133                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1134                         offsetof(struct seccomp_data, nr)),
1135                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1136                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1137                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1138         };
1139
1140         self->poked = 0;
1141         memset(&self->prog, 0, sizeof(self->prog));
1142         self->prog.filter = malloc(sizeof(filter));
1143         ASSERT_NE(NULL, self->prog.filter);
1144         memcpy(self->prog.filter, filter, sizeof(filter));
1145         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1146
1147         /* Set up tracer args. */
1148         self->tracer_args.poke_addr = (unsigned long)&self->poked;
1149
1150         /* Launch tracer. */
1151         self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1152                                            &self->tracer_args);
1153 }
1154
1155 FIXTURE_TEARDOWN(TRACE_poke)
1156 {
1157         teardown_trace_fixture(_metadata, self->tracer);
1158         if (self->prog.filter)
1159                 free(self->prog.filter);
1160 }
1161
1162 TEST_F(TRACE_poke, read_has_side_effects)
1163 {
1164         ssize_t ret;
1165
1166         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1167         ASSERT_EQ(0, ret);
1168
1169         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1170         ASSERT_EQ(0, ret);
1171
1172         EXPECT_EQ(0, self->poked);
1173         ret = read(-1, NULL, 0);
1174         EXPECT_EQ(-1, ret);
1175         EXPECT_EQ(0x1001, self->poked);
1176 }
1177
1178 TEST_F(TRACE_poke, getpid_runs_normally)
1179 {
1180         long ret;
1181
1182         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1183         ASSERT_EQ(0, ret);
1184
1185         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1186         ASSERT_EQ(0, ret);
1187
1188         EXPECT_EQ(0, self->poked);
1189         EXPECT_NE(0, syscall(__NR_getpid));
1190         EXPECT_EQ(0, self->poked);
1191 }
1192
1193 #if defined(__x86_64__)
1194 # define ARCH_REGS      struct user_regs_struct
1195 # define SYSCALL_NUM    orig_rax
1196 # define SYSCALL_RET    rax
1197 #elif defined(__i386__)
1198 # define ARCH_REGS      struct user_regs_struct
1199 # define SYSCALL_NUM    orig_eax
1200 # define SYSCALL_RET    eax
1201 #elif defined(__arm__)
1202 # define ARCH_REGS      struct pt_regs
1203 # define SYSCALL_NUM    ARM_r7
1204 # define SYSCALL_RET    ARM_r0
1205 #elif defined(__aarch64__)
1206 # define ARCH_REGS      struct user_pt_regs
1207 # define SYSCALL_NUM    regs[8]
1208 # define SYSCALL_RET    regs[0]
1209 #elif defined(__powerpc__)
1210 # define ARCH_REGS      struct pt_regs
1211 # define SYSCALL_NUM    gpr[0]
1212 # define SYSCALL_RET    gpr[3]
1213 #else
1214 # error "Do not know how to find your architecture's registers and syscalls"
1215 #endif
1216
1217 /* Architecture-specific syscall fetching routine. */
1218 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1219 {
1220         struct iovec iov;
1221         ARCH_REGS regs;
1222
1223         iov.iov_base = &regs;
1224         iov.iov_len = sizeof(regs);
1225         EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1226                 TH_LOG("PTRACE_GETREGSET failed");
1227                 return -1;
1228         }
1229
1230         return regs.SYSCALL_NUM;
1231 }
1232
1233 /* Architecture-specific syscall changing routine. */
1234 void change_syscall(struct __test_metadata *_metadata,
1235                     pid_t tracee, int syscall)
1236 {
1237         struct iovec iov;
1238         int ret;
1239         ARCH_REGS regs;
1240
1241         iov.iov_base = &regs;
1242         iov.iov_len = sizeof(regs);
1243         ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1244         EXPECT_EQ(0, ret);
1245
1246 #if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || defined(__powerpc__)
1247         {
1248                 regs.SYSCALL_NUM = syscall;
1249         }
1250
1251 #elif defined(__arm__)
1252 # ifndef PTRACE_SET_SYSCALL
1253 #  define PTRACE_SET_SYSCALL   23
1254 # endif
1255         {
1256                 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1257                 EXPECT_EQ(0, ret);
1258         }
1259
1260 #else
1261         ASSERT_EQ(1, 0) {
1262                 TH_LOG("How is the syscall changed on this architecture?");
1263         }
1264 #endif
1265
1266         /* If syscall is skipped, change return value. */
1267         if (syscall == -1)
1268                 regs.SYSCALL_RET = 1;
1269
1270         ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1271         EXPECT_EQ(0, ret);
1272 }
1273
1274 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1275                     int status, void *args)
1276 {
1277         int ret;
1278         unsigned long msg;
1279
1280         /* Make sure we got the right message. */
1281         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1282         EXPECT_EQ(0, ret);
1283
1284         switch (msg) {
1285         case 0x1002:
1286                 /* change getpid to getppid. */
1287                 change_syscall(_metadata, tracee, __NR_getppid);
1288                 break;
1289         case 0x1003:
1290                 /* skip gettid. */
1291                 change_syscall(_metadata, tracee, -1);
1292                 break;
1293         case 0x1004:
1294                 /* do nothing (allow getppid) */
1295                 break;
1296         default:
1297                 EXPECT_EQ(0, msg) {
1298                         TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1299                         kill(tracee, SIGKILL);
1300                 }
1301         }
1302
1303 }
1304
1305 FIXTURE_DATA(TRACE_syscall) {
1306         struct sock_fprog prog;
1307         pid_t tracer, mytid, mypid, parent;
1308 };
1309
1310 FIXTURE_SETUP(TRACE_syscall)
1311 {
1312         struct sock_filter filter[] = {
1313                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1314                         offsetof(struct seccomp_data, nr)),
1315                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1316                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1317                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1318                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1319                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1320                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1321                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1322         };
1323
1324         memset(&self->prog, 0, sizeof(self->prog));
1325         self->prog.filter = malloc(sizeof(filter));
1326         ASSERT_NE(NULL, self->prog.filter);
1327         memcpy(self->prog.filter, filter, sizeof(filter));
1328         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1329
1330         /* Prepare some testable syscall results. */
1331         self->mytid = syscall(__NR_gettid);
1332         ASSERT_GT(self->mytid, 0);
1333         ASSERT_NE(self->mytid, 1) {
1334                 TH_LOG("Running this test as init is not supported. :)");
1335         }
1336
1337         self->mypid = getpid();
1338         ASSERT_GT(self->mypid, 0);
1339         ASSERT_EQ(self->mytid, self->mypid);
1340
1341         self->parent = getppid();
1342         ASSERT_GT(self->parent, 0);
1343         ASSERT_NE(self->parent, self->mypid);
1344
1345         /* Launch tracer. */
1346         self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL);
1347 }
1348
1349 FIXTURE_TEARDOWN(TRACE_syscall)
1350 {
1351         teardown_trace_fixture(_metadata, self->tracer);
1352         if (self->prog.filter)
1353                 free(self->prog.filter);
1354 }
1355
1356 TEST_F(TRACE_syscall, syscall_allowed)
1357 {
1358         long ret;
1359
1360         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1361         ASSERT_EQ(0, ret);
1362
1363         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1364         ASSERT_EQ(0, ret);
1365
1366         /* getppid works as expected (no changes). */
1367         EXPECT_EQ(self->parent, syscall(__NR_getppid));
1368         EXPECT_NE(self->mypid, syscall(__NR_getppid));
1369 }
1370
1371 TEST_F(TRACE_syscall, syscall_redirected)
1372 {
1373         long ret;
1374
1375         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1376         ASSERT_EQ(0, ret);
1377
1378         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1379         ASSERT_EQ(0, ret);
1380
1381         /* getpid has been redirected to getppid as expected. */
1382         EXPECT_EQ(self->parent, syscall(__NR_getpid));
1383         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1384 }
1385
1386 TEST_F(TRACE_syscall, syscall_dropped)
1387 {
1388         long ret;
1389
1390         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1391         ASSERT_EQ(0, ret);
1392
1393         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1394         ASSERT_EQ(0, ret);
1395
1396         /* gettid has been skipped and an altered return value stored. */
1397         EXPECT_EQ(1, syscall(__NR_gettid));
1398         EXPECT_NE(self->mytid, syscall(__NR_gettid));
1399 }
1400
1401 #ifndef __NR_seccomp
1402 # if defined(__i386__)
1403 #  define __NR_seccomp 354
1404 # elif defined(__x86_64__)
1405 #  define __NR_seccomp 317
1406 # elif defined(__arm__)
1407 #  define __NR_seccomp 383
1408 # elif defined(__aarch64__)
1409 #  define __NR_seccomp 277
1410 # elif defined(__powerpc__)
1411 #  define __NR_seccomp 358
1412 # else
1413 #  warning "seccomp syscall number unknown for this architecture"
1414 #  define __NR_seccomp 0xffff
1415 # endif
1416 #endif
1417
1418 #ifndef SECCOMP_SET_MODE_STRICT
1419 #define SECCOMP_SET_MODE_STRICT 0
1420 #endif
1421
1422 #ifndef SECCOMP_SET_MODE_FILTER
1423 #define SECCOMP_SET_MODE_FILTER 1
1424 #endif
1425
1426 #ifndef SECCOMP_FLAG_FILTER_TSYNC
1427 #define SECCOMP_FLAG_FILTER_TSYNC 1
1428 #endif
1429
1430 #ifndef seccomp
1431 int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter)
1432 {
1433         errno = 0;
1434         return syscall(__NR_seccomp, op, flags, filter);
1435 }
1436 #endif
1437
1438 TEST(seccomp_syscall)
1439 {
1440         struct sock_filter filter[] = {
1441                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1442         };
1443         struct sock_fprog prog = {
1444                 .len = (unsigned short)ARRAY_SIZE(filter),
1445                 .filter = filter,
1446         };
1447         long ret;
1448
1449         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1450         ASSERT_EQ(0, ret) {
1451                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1452         }
1453
1454         /* Reject insane operation. */
1455         ret = seccomp(-1, 0, &prog);
1456         EXPECT_EQ(EINVAL, errno) {
1457                 TH_LOG("Did not reject crazy op value!");
1458         }
1459
1460         /* Reject strict with flags or pointer. */
1461         ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
1462         EXPECT_EQ(EINVAL, errno) {
1463                 TH_LOG("Did not reject mode strict with flags!");
1464         }
1465         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
1466         EXPECT_EQ(EINVAL, errno) {
1467                 TH_LOG("Did not reject mode strict with uargs!");
1468         }
1469
1470         /* Reject insane args for filter. */
1471         ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
1472         EXPECT_EQ(EINVAL, errno) {
1473                 TH_LOG("Did not reject crazy filter flags!");
1474         }
1475         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
1476         EXPECT_EQ(EFAULT, errno) {
1477                 TH_LOG("Did not reject NULL filter!");
1478         }
1479
1480         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1481         EXPECT_EQ(0, errno) {
1482                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
1483                         strerror(errno));
1484         }
1485 }
1486
1487 TEST(seccomp_syscall_mode_lock)
1488 {
1489         struct sock_filter filter[] = {
1490                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1491         };
1492         struct sock_fprog prog = {
1493                 .len = (unsigned short)ARRAY_SIZE(filter),
1494                 .filter = filter,
1495         };
1496         long ret;
1497
1498         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1499         ASSERT_EQ(0, ret) {
1500                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1501         }
1502
1503         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1504         EXPECT_EQ(0, ret) {
1505                 TH_LOG("Could not install filter!");
1506         }
1507
1508         /* Make sure neither entry point will switch to strict. */
1509         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
1510         EXPECT_EQ(EINVAL, errno) {
1511                 TH_LOG("Switched to mode strict!");
1512         }
1513
1514         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
1515         EXPECT_EQ(EINVAL, errno) {
1516                 TH_LOG("Switched to mode strict!");
1517         }
1518 }
1519
1520 TEST(TSYNC_first)
1521 {
1522         struct sock_filter filter[] = {
1523                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1524         };
1525         struct sock_fprog prog = {
1526                 .len = (unsigned short)ARRAY_SIZE(filter),
1527                 .filter = filter,
1528         };
1529         long ret;
1530
1531         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1532         ASSERT_EQ(0, ret) {
1533                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1534         }
1535
1536         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1537                       &prog);
1538         EXPECT_EQ(0, ret) {
1539                 TH_LOG("Could not install initial filter with TSYNC!");
1540         }
1541 }
1542
1543 #define TSYNC_SIBLINGS 2
1544 struct tsync_sibling {
1545         pthread_t tid;
1546         pid_t system_tid;
1547         sem_t *started;
1548         pthread_cond_t *cond;
1549         pthread_mutex_t *mutex;
1550         int diverge;
1551         int num_waits;
1552         struct sock_fprog *prog;
1553         struct __test_metadata *metadata;
1554 };
1555
1556 FIXTURE_DATA(TSYNC) {
1557         struct sock_fprog root_prog, apply_prog;
1558         struct tsync_sibling sibling[TSYNC_SIBLINGS];
1559         sem_t started;
1560         pthread_cond_t cond;
1561         pthread_mutex_t mutex;
1562         int sibling_count;
1563 };
1564
1565 FIXTURE_SETUP(TSYNC)
1566 {
1567         struct sock_filter root_filter[] = {
1568                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1569         };
1570         struct sock_filter apply_filter[] = {
1571                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1572                         offsetof(struct seccomp_data, nr)),
1573                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1574                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1575                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1576         };
1577
1578         memset(&self->root_prog, 0, sizeof(self->root_prog));
1579         memset(&self->apply_prog, 0, sizeof(self->apply_prog));
1580         memset(&self->sibling, 0, sizeof(self->sibling));
1581         self->root_prog.filter = malloc(sizeof(root_filter));
1582         ASSERT_NE(NULL, self->root_prog.filter);
1583         memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
1584         self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
1585
1586         self->apply_prog.filter = malloc(sizeof(apply_filter));
1587         ASSERT_NE(NULL, self->apply_prog.filter);
1588         memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
1589         self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
1590
1591         self->sibling_count = 0;
1592         pthread_mutex_init(&self->mutex, NULL);
1593         pthread_cond_init(&self->cond, NULL);
1594         sem_init(&self->started, 0, 0);
1595         self->sibling[0].tid = 0;
1596         self->sibling[0].cond = &self->cond;
1597         self->sibling[0].started = &self->started;
1598         self->sibling[0].mutex = &self->mutex;
1599         self->sibling[0].diverge = 0;
1600         self->sibling[0].num_waits = 1;
1601         self->sibling[0].prog = &self->root_prog;
1602         self->sibling[0].metadata = _metadata;
1603         self->sibling[1].tid = 0;
1604         self->sibling[1].cond = &self->cond;
1605         self->sibling[1].started = &self->started;
1606         self->sibling[1].mutex = &self->mutex;
1607         self->sibling[1].diverge = 0;
1608         self->sibling[1].prog = &self->root_prog;
1609         self->sibling[1].num_waits = 1;
1610         self->sibling[1].metadata = _metadata;
1611 }
1612
1613 FIXTURE_TEARDOWN(TSYNC)
1614 {
1615         int sib = 0;
1616
1617         if (self->root_prog.filter)
1618                 free(self->root_prog.filter);
1619         if (self->apply_prog.filter)
1620                 free(self->apply_prog.filter);
1621
1622         for ( ; sib < self->sibling_count; ++sib) {
1623                 struct tsync_sibling *s = &self->sibling[sib];
1624                 void *status;
1625
1626                 if (!s->tid)
1627                         continue;
1628                 if (pthread_kill(s->tid, 0)) {
1629                         pthread_cancel(s->tid);
1630                         pthread_join(s->tid, &status);
1631                 }
1632         }
1633         pthread_mutex_destroy(&self->mutex);
1634         pthread_cond_destroy(&self->cond);
1635         sem_destroy(&self->started);
1636 }
1637
1638 void *tsync_sibling(void *data)
1639 {
1640         long ret = 0;
1641         struct tsync_sibling *me = data;
1642
1643         me->system_tid = syscall(__NR_gettid);
1644
1645         pthread_mutex_lock(me->mutex);
1646         if (me->diverge) {
1647                 /* Just re-apply the root prog to fork the tree */
1648                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
1649                                 me->prog, 0, 0);
1650         }
1651         sem_post(me->started);
1652         /* Return outside of started so parent notices failures. */
1653         if (ret) {
1654                 pthread_mutex_unlock(me->mutex);
1655                 return (void *)SIBLING_EXIT_FAILURE;
1656         }
1657         do {
1658                 pthread_cond_wait(me->cond, me->mutex);
1659                 me->num_waits = me->num_waits - 1;
1660         } while (me->num_waits);
1661         pthread_mutex_unlock(me->mutex);
1662
1663         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1664         if (!ret)
1665                 return (void *)SIBLING_EXIT_NEWPRIVS;
1666         read(0, NULL, 0);
1667         return (void *)SIBLING_EXIT_UNKILLED;
1668 }
1669
1670 void tsync_start_sibling(struct tsync_sibling *sibling)
1671 {
1672         pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
1673 }
1674
1675 TEST_F(TSYNC, siblings_fail_prctl)
1676 {
1677         long ret;
1678         void *status;
1679         struct sock_filter filter[] = {
1680                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1681                         offsetof(struct seccomp_data, nr)),
1682                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
1683                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
1684                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1685         };
1686         struct sock_fprog prog = {
1687                 .len = (unsigned short)ARRAY_SIZE(filter),
1688                 .filter = filter,
1689         };
1690
1691         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1692                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1693         }
1694
1695         /* Check prctl failure detection by requesting sib 0 diverge. */
1696         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1697         ASSERT_EQ(0, ret) {
1698                 TH_LOG("setting filter failed");
1699         }
1700
1701         self->sibling[0].diverge = 1;
1702         tsync_start_sibling(&self->sibling[0]);
1703         tsync_start_sibling(&self->sibling[1]);
1704
1705         while (self->sibling_count < TSYNC_SIBLINGS) {
1706                 sem_wait(&self->started);
1707                 self->sibling_count++;
1708         }
1709
1710         /* Signal the threads to clean up*/
1711         pthread_mutex_lock(&self->mutex);
1712         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1713                 TH_LOG("cond broadcast non-zero");
1714         }
1715         pthread_mutex_unlock(&self->mutex);
1716
1717         /* Ensure diverging sibling failed to call prctl. */
1718         pthread_join(self->sibling[0].tid, &status);
1719         EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
1720         pthread_join(self->sibling[1].tid, &status);
1721         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1722 }
1723
1724 TEST_F(TSYNC, two_siblings_with_ancestor)
1725 {
1726         long ret;
1727         void *status;
1728
1729         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1730                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1731         }
1732
1733         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1734         ASSERT_EQ(0, ret) {
1735                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1736         }
1737         tsync_start_sibling(&self->sibling[0]);
1738         tsync_start_sibling(&self->sibling[1]);
1739
1740         while (self->sibling_count < TSYNC_SIBLINGS) {
1741                 sem_wait(&self->started);
1742                 self->sibling_count++;
1743         }
1744
1745         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1746                       &self->apply_prog);
1747         ASSERT_EQ(0, ret) {
1748                 TH_LOG("Could install filter on all threads!");
1749         }
1750         /* Tell the siblings to test the policy */
1751         pthread_mutex_lock(&self->mutex);
1752         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1753                 TH_LOG("cond broadcast non-zero");
1754         }
1755         pthread_mutex_unlock(&self->mutex);
1756         /* Ensure they are both killed and don't exit cleanly. */
1757         pthread_join(self->sibling[0].tid, &status);
1758         EXPECT_EQ(0x0, (long)status);
1759         pthread_join(self->sibling[1].tid, &status);
1760         EXPECT_EQ(0x0, (long)status);
1761 }
1762
1763 TEST_F(TSYNC, two_sibling_want_nnp)
1764 {
1765         void *status;
1766
1767         /* start siblings before any prctl() operations */
1768         tsync_start_sibling(&self->sibling[0]);
1769         tsync_start_sibling(&self->sibling[1]);
1770         while (self->sibling_count < TSYNC_SIBLINGS) {
1771                 sem_wait(&self->started);
1772                 self->sibling_count++;
1773         }
1774
1775         /* Tell the siblings to test no policy */
1776         pthread_mutex_lock(&self->mutex);
1777         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1778                 TH_LOG("cond broadcast non-zero");
1779         }
1780         pthread_mutex_unlock(&self->mutex);
1781
1782         /* Ensure they are both upset about lacking nnp. */
1783         pthread_join(self->sibling[0].tid, &status);
1784         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
1785         pthread_join(self->sibling[1].tid, &status);
1786         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
1787 }
1788
1789 TEST_F(TSYNC, two_siblings_with_no_filter)
1790 {
1791         long ret;
1792         void *status;
1793
1794         /* start siblings before any prctl() operations */
1795         tsync_start_sibling(&self->sibling[0]);
1796         tsync_start_sibling(&self->sibling[1]);
1797         while (self->sibling_count < TSYNC_SIBLINGS) {
1798                 sem_wait(&self->started);
1799                 self->sibling_count++;
1800         }
1801
1802         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1803                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1804         }
1805
1806         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1807                       &self->apply_prog);
1808         ASSERT_EQ(0, ret) {
1809                 TH_LOG("Could install filter on all threads!");
1810         }
1811
1812         /* Tell the siblings to test the policy */
1813         pthread_mutex_lock(&self->mutex);
1814         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1815                 TH_LOG("cond broadcast non-zero");
1816         }
1817         pthread_mutex_unlock(&self->mutex);
1818
1819         /* Ensure they are both killed and don't exit cleanly. */
1820         pthread_join(self->sibling[0].tid, &status);
1821         EXPECT_EQ(0x0, (long)status);
1822         pthread_join(self->sibling[1].tid, &status);
1823         EXPECT_EQ(0x0, (long)status);
1824 }
1825
1826 TEST_F(TSYNC, two_siblings_with_one_divergence)
1827 {
1828         long ret;
1829         void *status;
1830
1831         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1832                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1833         }
1834
1835         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1836         ASSERT_EQ(0, ret) {
1837                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1838         }
1839         self->sibling[0].diverge = 1;
1840         tsync_start_sibling(&self->sibling[0]);
1841         tsync_start_sibling(&self->sibling[1]);
1842
1843         while (self->sibling_count < TSYNC_SIBLINGS) {
1844                 sem_wait(&self->started);
1845                 self->sibling_count++;
1846         }
1847
1848         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1849                       &self->apply_prog);
1850         ASSERT_EQ(self->sibling[0].system_tid, ret) {
1851                 TH_LOG("Did not fail on diverged sibling.");
1852         }
1853
1854         /* Wake the threads */
1855         pthread_mutex_lock(&self->mutex);
1856         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1857                 TH_LOG("cond broadcast non-zero");
1858         }
1859         pthread_mutex_unlock(&self->mutex);
1860
1861         /* Ensure they are both unkilled. */
1862         pthread_join(self->sibling[0].tid, &status);
1863         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1864         pthread_join(self->sibling[1].tid, &status);
1865         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1866 }
1867
1868 TEST_F(TSYNC, two_siblings_not_under_filter)
1869 {
1870         long ret, sib;
1871         void *status;
1872
1873         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1874                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1875         }
1876
1877         /*
1878          * Sibling 0 will have its own seccomp policy
1879          * and Sibling 1 will not be under seccomp at
1880          * all. Sibling 1 will enter seccomp and 0
1881          * will cause failure.
1882          */
1883         self->sibling[0].diverge = 1;
1884         tsync_start_sibling(&self->sibling[0]);
1885         tsync_start_sibling(&self->sibling[1]);
1886
1887         while (self->sibling_count < TSYNC_SIBLINGS) {
1888                 sem_wait(&self->started);
1889                 self->sibling_count++;
1890         }
1891
1892         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1893         ASSERT_EQ(0, ret) {
1894                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1895         }
1896
1897         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1898                       &self->apply_prog);
1899         ASSERT_EQ(ret, self->sibling[0].system_tid) {
1900                 TH_LOG("Did not fail on diverged sibling.");
1901         }
1902         sib = 1;
1903         if (ret == self->sibling[0].system_tid)
1904                 sib = 0;
1905
1906         pthread_mutex_lock(&self->mutex);
1907
1908         /* Increment the other siblings num_waits so we can clean up
1909          * the one we just saw.
1910          */
1911         self->sibling[!sib].num_waits += 1;
1912
1913         /* Signal the thread to clean up*/
1914         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1915                 TH_LOG("cond broadcast non-zero");
1916         }
1917         pthread_mutex_unlock(&self->mutex);
1918         pthread_join(self->sibling[sib].tid, &status);
1919         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1920         /* Poll for actual task death. pthread_join doesn't guarantee it. */
1921         while (!kill(self->sibling[sib].system_tid, 0))
1922                 sleep(0.1);
1923         /* Switch to the remaining sibling */
1924         sib = !sib;
1925
1926         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1927                       &self->apply_prog);
1928         ASSERT_EQ(0, ret) {
1929                 TH_LOG("Expected the remaining sibling to sync");
1930         };
1931
1932         pthread_mutex_lock(&self->mutex);
1933
1934         /* If remaining sibling didn't have a chance to wake up during
1935          * the first broadcast, manually reduce the num_waits now.
1936          */
1937         if (self->sibling[sib].num_waits > 1)
1938                 self->sibling[sib].num_waits = 1;
1939         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1940                 TH_LOG("cond broadcast non-zero");
1941         }
1942         pthread_mutex_unlock(&self->mutex);
1943         pthread_join(self->sibling[sib].tid, &status);
1944         EXPECT_EQ(0, (long)status);
1945         /* Poll for actual task death. pthread_join doesn't guarantee it. */
1946         while (!kill(self->sibling[sib].system_tid, 0))
1947                 sleep(0.1);
1948
1949         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1950                       &self->apply_prog);
1951         ASSERT_EQ(0, ret);  /* just us chickens */
1952 }
1953
1954 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
1955 TEST(syscall_restart)
1956 {
1957         long ret;
1958         unsigned long msg;
1959         pid_t child_pid;
1960         int pipefd[2];
1961         int status;
1962         siginfo_t info = { };
1963         struct sock_filter filter[] = {
1964                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1965                          offsetof(struct seccomp_data, nr)),
1966
1967 #ifdef __NR_sigreturn
1968                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
1969 #endif
1970                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
1971                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
1972                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
1973                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_poll, 4, 0),
1974                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
1975
1976                 /* Allow __NR_write for easy logging. */
1977                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
1978                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1979                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1980                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), /* poll */
1981                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), /* restart */
1982         };
1983         struct sock_fprog prog = {
1984                 .len = (unsigned short)ARRAY_SIZE(filter),
1985                 .filter = filter,
1986         };
1987
1988         ASSERT_EQ(0, pipe(pipefd));
1989
1990         child_pid = fork();
1991         ASSERT_LE(0, child_pid);
1992         if (child_pid == 0) {
1993                 /* Child uses EXPECT not ASSERT to deliver status correctly. */
1994                 char buf = ' ';
1995                 struct pollfd fds = {
1996                         .fd = pipefd[0],
1997                         .events = POLLIN,
1998                 };
1999
2000                 /* Attach parent as tracer and stop. */
2001                 EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2002                 EXPECT_EQ(0, raise(SIGSTOP));
2003
2004                 EXPECT_EQ(0, close(pipefd[1]));
2005
2006                 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2007                         TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2008                 }
2009
2010                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2011                 EXPECT_EQ(0, ret) {
2012                         TH_LOG("Failed to install filter!");
2013                 }
2014
2015                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2016                         TH_LOG("Failed to read() sync from parent");
2017                 }
2018                 EXPECT_EQ('.', buf) {
2019                         TH_LOG("Failed to get sync data from read()");
2020                 }
2021
2022                 /* Start poll to be interrupted. */
2023                 errno = 0;
2024                 EXPECT_EQ(1, poll(&fds, 1, -1)) {
2025                         TH_LOG("Call to poll() failed (errno %d)", errno);
2026                 }
2027
2028                 /* Read final sync from parent. */
2029                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2030                         TH_LOG("Failed final read() from parent");
2031                 }
2032                 EXPECT_EQ('!', buf) {
2033                         TH_LOG("Failed to get final data from read()");
2034                 }
2035
2036                 /* Directly report the status of our test harness results. */
2037                 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2038                                                      : EXIT_FAILURE);
2039         }
2040         EXPECT_EQ(0, close(pipefd[0]));
2041
2042         /* Attach to child, setup options, and release. */
2043         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2044         ASSERT_EQ(true, WIFSTOPPED(status));
2045         ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2046                             PTRACE_O_TRACESECCOMP));
2047         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2048         ASSERT_EQ(1, write(pipefd[1], ".", 1));
2049
2050         /* Wait for poll() to start. */
2051         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2052         ASSERT_EQ(true, WIFSTOPPED(status));
2053         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2054         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2055         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2056         ASSERT_EQ(0x100, msg);
2057         EXPECT_EQ(__NR_poll, get_syscall(_metadata, child_pid));
2058
2059         /* Might as well check siginfo for sanity while we're here. */
2060         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2061         ASSERT_EQ(SIGTRAP, info.si_signo);
2062         ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2063         EXPECT_EQ(0, info.si_errno);
2064         EXPECT_EQ(getuid(), info.si_uid);
2065         /* Verify signal delivery came from child (seccomp-triggered). */
2066         EXPECT_EQ(child_pid, info.si_pid);
2067
2068         /* Interrupt poll with SIGSTOP (which we'll need to handle). */
2069         ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2070         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2071         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2072         ASSERT_EQ(true, WIFSTOPPED(status));
2073         ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2074         /* Verify signal delivery came from parent now. */
2075         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2076         EXPECT_EQ(getpid(), info.si_pid);
2077
2078         /* Restart poll with SIGCONT, which triggers restart_syscall. */
2079         ASSERT_EQ(0, kill(child_pid, SIGCONT));
2080         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2081         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2082         ASSERT_EQ(true, WIFSTOPPED(status));
2083         ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2084         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2085
2086         /* Wait for restart_syscall() to start. */
2087         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2088         ASSERT_EQ(true, WIFSTOPPED(status));
2089         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2090         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2091         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2092         ASSERT_EQ(0x200, msg);
2093         ret = get_syscall(_metadata, child_pid);
2094 #if defined(__arm__)
2095         /* FIXME: ARM does not expose true syscall in registers. */
2096         EXPECT_EQ(__NR_poll, ret);
2097 #else
2098         EXPECT_EQ(__NR_restart_syscall, ret);
2099 #endif
2100
2101         /* Write again to end poll. */
2102         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2103         ASSERT_EQ(1, write(pipefd[1], "!", 1));
2104         EXPECT_EQ(0, close(pipefd[1]));
2105
2106         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2107         if (WIFSIGNALED(status) || WEXITSTATUS(status))
2108                 _metadata->passed = 0;
2109 }
2110
2111 /*
2112  * TODO:
2113  * - add microbenchmarks
2114  * - expand NNP testing
2115  * - better arch-specific TRACE and TRAP handlers.
2116  * - endianness checking when appropriate
2117  * - 64-bit arg prodding
2118  * - arch value testing (x86 modes especially)
2119  * - ...
2120  */
2121
2122 TEST_HARNESS_MAIN