kore

Kore is a web application platform for writing scalable, concurrent web based processes in C or Python.
Commits | Files | Refs | README | LICENSE | git clone https://git.kore.io/kore.git

commit 8311c036d9c11f182522a69174c264ffeed88fb3
parent 16afcb66d0ef3ad421ac312e4e9bad2dabc9a25c
Author: Joris Vink <joris@coders.se>
Date:   Thu, 31 Oct 2019 12:52:10 +0100

Add seccomp_tracing configuration option for linux.

If set to "yes" then Kore will trace its child processes and properly
notify you of seccomp violations while still allowing the syscalls.

This can be very useful when running Kore on new platforms that have
not been properly tested with seccomp, allowing me to adjust the default
policies as we move further.

Diffstat:
include/kore/kore.h | 3+++
include/kore/seccomp.h | 5+++++
src/config.c | 29+++++++++++++++++++++++++++++
src/seccomp.c | 117+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
src/worker.c | 77+++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
5 files changed, 182 insertions(+), 49 deletions(-)

diff --git a/include/kore/kore.h b/include/kore/kore.h @@ -436,6 +436,9 @@ struct kore_alog_header { struct kore_worker { u_int8_t id; u_int8_t cpu; +#if defined(__linux__) + int tracing; +#endif pid_t pid; int pipe[2]; struct connection *msg[2]; diff --git a/include/kore/seccomp.h b/include/kore/seccomp.h @@ -158,12 +158,17 @@ KORE_FILTER_LEN(_scfilt)); \ } +extern int kore_seccomp_tracing; + void kore_seccomp_init(void); void kore_seccomp_drop(void); void kore_seccomp_enable(void); +void kore_seccomp_traceme(void); int kore_seccomp_syscall_resolve(const char *); +int kore_seccomp_trace(struct kore_worker *, int); int kore_seccomp_filter(const char *, void *, size_t); +const char *kore_seccomp_syscall_name(long); struct sock_filter *kore_seccomp_syscall_filter(const char *, int); struct sock_filter *kore_seccomp_syscall_arg(const char *, int, int, int); struct sock_filter *kore_seccomp_syscall_flag(const char *, int, int, int); diff --git a/src/config.c b/src/config.c @@ -45,6 +45,10 @@ #include "curl.h" #endif +#if defined(__linux__) +#include "seccomp.h" +#endif + /* XXX - This is becoming a clusterfuck. Fix it. */ static int configure_load(char *); @@ -142,6 +146,10 @@ static int configure_curl_timeout(char *); static int configure_curl_recv_max(char *); #endif +#if defined(__linux__) +static int configure_seccomp_tracing(char *); +#endif + static struct { const char *name; int (*configure)(char *); @@ -204,6 +212,9 @@ static struct { #if defined(KORE_USE_PLATFORM_PLEDGE) { "pledge", configure_add_pledge }, #endif +#if defined(__linux__) + { "seccomp_tracing", configure_seccomp_tracing }, +#endif #if !defined(KORE_NO_HTTP) { "filemap_ext", configure_filemap_ext }, { "filemap_index", configure_filemap_index }, @@ -1722,3 +1733,21 @@ configure_curl_timeout(char *option) return (KORE_RESULT_OK); } #endif + +#if defined(__linux__) +static int +configure_seccomp_tracing(char *opt) +{ + if (!strcmp(opt, "yes")) { + kore_seccomp_tracing = 1; + } else if (!strcmp(opt, "no")) { + kore_seccomp_tracing = 0; + } else { + printf("bad seccomp_tracing value: %s (expected yes|no)\n", + opt); + return (KORE_RESULT_ERROR); + } + + return (KORE_RESULT_OK); +} +#endif diff --git a/src/seccomp.c b/src/seccomp.c @@ -17,7 +17,9 @@ #include <sys/param.h> #include <sys/mman.h> #include <sys/epoll.h> +#include <sys/ptrace.h> #include <sys/prctl.h> +#include <sys/reg.h> #include <sys/syscall.h> #include <linux/seccomp.h> @@ -36,12 +38,8 @@ #endif #if !defined(SECCOMP_KILL_POLICY) -#if defined(KORE_DEBUG) -#define SECCOMP_KILL_POLICY SECCOMP_RET_TRAP -#else #define SECCOMP_KILL_POLICY SECCOMP_RET_KILL #endif -#endif /* * The bare minimum to be able to run kore. These are added last and can @@ -140,9 +138,7 @@ static struct sock_filter *seccomp_filter_update(struct sock_filter *, #define filter_prologue_len KORE_FILTER_LEN(filter_prologue) #define filter_epilogue_len KORE_FILTER_LEN(filter_epilogue) -#if defined(KORE_DEBUG) -static void seccomp_trap(int sig, siginfo_t *, void *); -#endif +static void seccomp_register_violation(struct kore_worker *); struct filter { char *name; @@ -154,6 +150,12 @@ struct filter { static TAILQ_HEAD(, filter) filters; static struct filter *ufilter = NULL; +/* + * If enabled will instruct the parent process to ptrace its children and + * log any seccomp SECCOMP_RET_TRACE rule. + */ +int kore_seccomp_tracing = 0; + void kore_seccomp_init(void) { @@ -181,26 +183,20 @@ kore_seccomp_drop(void) void kore_seccomp_enable(void) { -#if defined(KORE_DEBUG) - struct sigaction sa; -#endif struct sock_filter *sf; struct sock_fprog prog; struct kore_runtime_call *rcall; struct filter *filter; size_t prog_len, off, i; -#if defined(KORE_DEBUG) - memset(&sa, 0, sizeof(sa)); - - sa.sa_flags = SA_SIGINFO; - sa.sa_sigaction = seccomp_trap; - - if (sigfillset(&sa.sa_mask) == -1) - fatalx("sigfillset: %s", errno_s); - if (sigaction(SIGSYS, &sa, NULL) == -1) - fatalx("sigaction: %s", errno_s); -#endif + /* + * If kore_seccomp_tracing is turned on, set the default policy to + * SECCOMP_RET_TRACE so we can log the system calls. + */ + if (kore_seccomp_tracing) { + filter_epilogue[0].k = SECCOMP_RET_TRACE; + kore_log(LOG_NOTICE, "seccomp tracing enabled"); + } #if defined(KORE_USE_PYTHON) ufilter = TAILQ_FIRST(&filters); @@ -243,10 +239,6 @@ kore_seccomp_enable(void) TAILQ_FOREACH(filter, &filters, list) { for (i = 0; i < filter->instructions; i++) sf[off++] = filter->prog[i]; -#if defined(KORE_DEBUG) - kore_log(LOG_INFO, - "seccomp filter '%s' added", filter->name); -#endif } for (i = 0; i < filter_epilogue_len; i++) @@ -292,6 +284,55 @@ kore_seccomp_filter(const char *name, void *prog, size_t len) return (KORE_RESULT_OK); } +void +kore_seccomp_traceme(void) +{ + if (kore_seccomp_tracing == 0) + return; + + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) == -1) + fatalx("ptrace. %s", errno_s); + if (kill(worker->pid, SIGSTOP) == -1) + fatalx("kill: %s", errno_s); +} + +int +kore_seccomp_trace(struct kore_worker *kw, int status) +{ + if (kore_seccomp_tracing == 0) + return (KORE_RESULT_ERROR); + + if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) { + if (kw->tracing == 0) { + kw->tracing = 1; + if (ptrace(PTRACE_SETOPTIONS, kw->pid, NULL, + PTRACE_O_TRACESECCOMP) == -1) + fatal("ptrace: %s", errno_s); + if (ptrace(PTRACE_CONT, kw->pid, NULL, NULL) == -1) + fatal("ptrace: %s", errno_s); + } + + return (KORE_RESULT_OK); + } + + if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { + if ((status >> 8) == + (SIGTRAP | (PTRACE_EVENT_SECCOMP << 8))) + seccomp_register_violation(kw); + if (ptrace(PTRACE_CONT, kw->pid, NULL, NULL) == -1) + fatal("ptrace: %s", errno_s); + return (KORE_RESULT_OK); + } + + if (WIFSTOPPED(status) && kw->tracing) { + if (ptrace(PTRACE_CONT, kw->pid, NULL, WSTOPSIG(status)) == -1) + fatal("ptrace: %s", errno_s); + return (KORE_RESULT_OK); + } + + return (KORE_RESULT_ERROR); +} + int kore_seccomp_syscall_resolve(const char *name) { @@ -305,6 +346,19 @@ kore_seccomp_syscall_resolve(const char *name) return (-1); } +const char * +kore_seccomp_syscall_name(long sysnr) +{ + int i; + + for (i = 0; kore_syscall_map[i].name != NULL; i++) { + if (kore_syscall_map[i].nr == sysnr) + return (kore_syscall_map[i].name); + } + + return ("unknown"); +} + struct sock_filter * kore_seccomp_syscall_filter(const char *name, int action) { @@ -349,13 +403,18 @@ kore_seccomp_syscall_flag(const char *name, int action, int arg, int value) return (seccomp_filter_update(filter, name, KORE_FILTER_LEN(filter))); } -#if defined(KORE_DEBUG) static void -seccomp_trap(int sig, siginfo_t *info, void *ucontext) +seccomp_register_violation(struct kore_worker *kw) { - kore_log(LOG_INFO, "sandbox violation - syscall=%d", info->si_syscall); + long sysnr; + + if ((sysnr = ptrace(PTRACE_PEEKUSER, kw->pid, + sizeof(long) * ORIG_RAX, NULL)) == -1) + fatal("ptrace: %s", errno_s); + + kore_log(LOG_INFO, "seccomp violation, worker=%d, syscall=%s", + kw->id, kore_seccomp_syscall_name(sysnr)); } -#endif static struct sock_filter * seccomp_filter_update(struct sock_filter *filter, const char *name, size_t elm) diff --git a/src/worker.c b/src/worker.c @@ -51,6 +51,10 @@ #include "curl.h" #endif +#if defined(__linux__) +#include "seccomp.h" +#endif + #if !defined(WAIT_ANY) #define WAIT_ANY (-1) #endif @@ -68,6 +72,7 @@ struct wlock { static int worker_trylock(void); static void worker_unlock(void); +static void worker_reaper(pid_t, int); static inline int worker_acceptlock_obtain(void); static inline void worker_acceptlock_release(void); @@ -207,7 +212,19 @@ kore_worker_shutdown(void) pid = waitpid(kw->pid, &status, 0); if (pid == -1) continue; - kw->pid = 0; + +#if defined(__linux__) + kore_seccomp_trace(kw, status); +#endif + + if (WIFEXITED(status)) { + kw->pid = 0; + + if (!kore_quiet) { + kore_log(LOG_NOTICE, + "worker %d exited", kw->id); + } + } } } @@ -319,6 +336,10 @@ kore_worker_entry(struct kore_worker *kw) worker = kw; +#if defined(__linux__) + kore_seccomp_traceme(); +#endif + (void)snprintf(buf, sizeof(buf), "[wrk %d]", kw->id); if (kw->id == KORE_WORKER_KEYMGR) (void)snprintf(buf, sizeof(buf), "[keymgr]"); @@ -488,6 +509,7 @@ kore_worker_entry(struct kore_worker *kw) kore_free(rcall); } + kore_msg_send(KORE_MSG_PARENT, KORE_MSG_SHUTDOWN, NULL, 0); kore_server_cleanup(); kore_platform_event_cleanup(); @@ -516,10 +538,7 @@ kore_worker_entry(struct kore_worker *kw) void kore_worker_reap(void) { - u_int16_t id; pid_t pid; - struct kore_worker *kw; - const char *func; int status; for (;;) { @@ -535,23 +554,55 @@ kore_worker_reap(void) return; } - break; + if (pid == 0) + return; + + worker_reaper(pid, status); } +} - if (pid == 0) +void +kore_worker_make_busy(void) +{ + if (worker_count == WORKER_SOLO_COUNT || worker_no_lock == 1) return; + if (worker->has_lock) { + worker_unlock(); + worker->has_lock = 0; + kore_msg_send(KORE_MSG_WORKER_ALL, + KORE_MSG_ACCEPT_AVAILABLE, NULL, 0); + } +} + +static void +worker_reaper(pid_t pid, int status) +{ + u_int16_t id; + struct kore_worker *kw; + const char *func; + for (id = 0; id < worker_count; id++) { kw = WORKER(id); if (kw->pid != pid) continue; +#if defined(__linux__) + if (kore_seccomp_trace(kw, status)) + break; +#endif + if (!kore_quiet) { kore_log(LOG_NOTICE, "worker %d (%d) exited with status %d", kw->id, pid, status); } + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { + kw->pid = 0; + break; + } + func = "none"; #if !defined(KORE_NO_HTTP) if (kw->active_hdlr != NULL) @@ -613,20 +664,6 @@ kore_worker_reap(void) } } -void -kore_worker_make_busy(void) -{ - if (worker_count == WORKER_SOLO_COUNT || worker_no_lock == 1) - return; - - if (worker->has_lock) { - worker_unlock(); - worker->has_lock = 0; - kore_msg_send(KORE_MSG_WORKER_ALL, - KORE_MSG_ACCEPT_AVAILABLE, NULL, 0); - } -} - static inline void worker_acceptlock_release(void) {