If a seccomp filter program is installed, older static binaries and
distributions with older libc implementations (glibc 2.13 and earlier)
that rely on vsyscall use will be terminated regardless of the filter
program policy when executing time, gettimeofday, or getcpu. This is
only the case when vsyscall emulation is in use (vsyscall=emulate is the
default).
This patch emulates system call entry inside a vsyscall=emulate by
populating regs->ax and regs->orig_ax with the system call number prior
to calling into seccomp such that all seccomp-dependencies function
normally. Additionally, system call return behavior is emulated in line
with other vsyscall entrypoints for the trace/trap cases.
[ v2: fixed ip and sp on SECCOMP_RET_TRAP/TRACE (thanks to luto@mit.edu) ]
Reported-and-tested-by: Owen Kibel <qmewlo@gmail.com>
Signed-off-by: Will Drewry <wad@chromium.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr)
+{
+ if (!seccomp_mode(&tsk->seccomp))
+ return 0;
+ task_pt_regs(tsk)->orig_ax = syscall_nr;
+ task_pt_regs(tsk)->ax = syscall_nr;
+ return __secure_computing(syscall_nr);
+}
+
static bool write_ok_or_segv(unsigned long ptr, size_t size)
{
/*
static bool write_ok_or_segv(unsigned long ptr, size_t size)
{
/*
int vsyscall_nr;
int prev_sig_on_uaccess_error;
long ret;
int vsyscall_nr;
int prev_sig_on_uaccess_error;
long ret;
/*
* No point in checking CS -- the only way to get here is a user mode
/*
* No point in checking CS -- the only way to get here is a user mode
- if (seccomp_mode(&tsk->seccomp))
- do_exit(SIGKILL);
-
/*
* With a real vsyscall, page faults cause SIGSEGV. We want to
* preserve that behavior to make writing exploits harder.
/*
* With a real vsyscall, page faults cause SIGSEGV. We want to
* preserve that behavior to make writing exploits harder.
* address 0".
*/
ret = -EFAULT;
* address 0".
*/
ret = -EFAULT;
switch (vsyscall_nr) {
case 0:
switch (vsyscall_nr) {
case 0:
+ skip = vsyscall_seccomp(tsk, __NR_gettimeofday);
+ if (skip)
+ break;
+
if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
!write_ok_or_segv(regs->si, sizeof(struct timezone)))
break;
if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
!write_ok_or_segv(regs->si, sizeof(struct timezone)))
break;
+ skip = vsyscall_seccomp(tsk, __NR_time);
+ if (skip)
+ break;
+
if (!write_ok_or_segv(regs->di, sizeof(time_t)))
break;
if (!write_ok_or_segv(regs->di, sizeof(time_t)))
break;
+ skip = vsyscall_seccomp(tsk, __NR_getcpu);
+ if (skip)
+ break;
+
if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
!write_ok_or_segv(regs->si, sizeof(unsigned)))
break;
if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
!write_ok_or_segv(regs->si, sizeof(unsigned)))
break;
current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
+ if (skip) {
+ if ((long)regs->ax <= 0L) /* seccomp errno emulation */
+ goto do_ret;
+ goto done; /* seccomp trace/trap */
+ }
+
if (ret == -EFAULT) {
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
warn_bad_vsyscall(KERN_INFO, regs,
if (ret == -EFAULT) {
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
warn_bad_vsyscall(KERN_INFO, regs,
/* Emulate a ret instruction. */
regs->ip = caller;
regs->sp += 8;
/* Emulate a ret instruction. */
regs->ip = caller;
regs->sp += 8;