kore

Kore is a web application platform for writing scalable, concurrent web based processes in C or Python.
Commits | Files | Refs | README | LICENSE | git clone https://git.kore.io/kore.git

commit 296fe7a6d43959aadefefa630ef1c476feee939a
parent 4ed6136693e0519c1ed5e7684e7ca0882628bd2a
Author: Joris Vink <joris@coders.se>
Date:   Thu, 26 Sep 2019 13:51:53 +0200

seccomp improvements.

More BPF helper macros, more helper for granular syscall checking.
Use these throughout kore where it makes sense.

The new helpers:

- KORE_SYSCALL_DENY_ARG(name, arg, value, errno):
	Deny the system call with errno if the argument matches value.

- KORE_SYSCALL_DENY_MASK(name, arg, mask, errno):
	Deny the system call with errno if the mask argument does not match
	the exact mask given.

- KORE_SYSCALL_DENY_WITH_FLAG(name, arg, flag, errno):
	Deny the system call with errno if the argument contains the
	given flag.

The reverse also exists:

- KORE_SYSCALL_ALLOW_ARG()
- KORE_SYSCALL_ALLOW_MASK()
- KORE_SYSCALL_ALLOW_WITH_FLAG()

Diffstat:
include/kore/seccomp.h | 112+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
src/curl.c | 4+++-
src/pgsql.c | 9++++++---
src/seccomp.c | 22++++++++++++++--------
4 files changed, 124 insertions(+), 23 deletions(-)

diff --git a/include/kore/seccomp.h b/include/kore/seccomp.h @@ -23,22 +23,113 @@ #include <linux/filter.h> #include <linux/seccomp.h> +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define ARGS_LO_OFFSET 0 +#define ARGS_HI_OFFSET sizeof(u_int32_t) +#elif __BYTE_ORDER == __BIG_ENDIAN +#define ARGS_LO_OFFSET sizeof(u_int32_t) +#define ARGS_HI_OFFSET 0 +#else +#error "__BYTE_ORDER unknown" +#endif + /* Do something with a syscall with a user-supplied action. */ -#define KORE_SYSCALL_FILTER(_name, _action) \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SYS_##_name, 0, 1), \ - BPF_STMT(BPF_RET+BPF_K, _action) +#define KORE_SYSCALL_FILTER(_name, _action) \ + KORE_BPF_CMP(SYS_##_name, 0, 1), \ + KORE_BPF_RET(_action) + +/* + * Check if a system call is called with the supplied value as argument. + * + * This is checked in 2 steps due to args being 64-bit and the accumulator + * only being a 32-bit register. + * + * If true we return the given action, otherwise nothing happens. + */ +#define KORE_SYSCALL_ARG(_name, _arg, _val, _action) \ + KORE_BPF_CMP(SYS_##_name, 0, 6), \ + KORE_BPF_LOAD(args[(_arg)], ARGS_LO_OFFSET), \ + KORE_BPF_CMP(((_val) & 0xffffffff), 0, 3), \ + KORE_BPF_LOAD(args[(_arg)], ARGS_HI_OFFSET), \ + KORE_BPF_CMP((((uint32_t)((uint64_t)(_val) >> 32)) & 0xffffffff), 0, 1), \ + KORE_BPF_RET(_action), \ + KORE_BPF_LOAD(nr, 0) + +/* + * Check if a system call is called with the supplied mask as argument. + * + * As KORE_SYSCALL_ARG() this is done in 2 steps. + */ +#define KORE_SYSCALL_MASK(_name, _arg, _mask, _action) \ + KORE_BPF_CMP(SYS_##_name, 0, 8), \ + KORE_BPF_LOAD(args[(_arg)], ARGS_LO_OFFSET), \ + KORE_BPF_AND(~((_mask) & 0xffffffff)), \ + KORE_BPF_CMP(0, 0, 4), \ + KORE_BPF_LOAD(args[(_arg)], ARGS_HI_OFFSET), \ + KORE_BPF_AND(~(((uint32_t)((uint64_t)(_mask) >> 32)) & 0xffffffff)), \ + KORE_BPF_CMP(0, 0, 1), \ + KORE_BPF_RET(_action), \ + KORE_BPF_LOAD(nr, 0) + +/* + * Check if the system call is called with the given value in the argument + * contains the given flag. + */ +#define KORE_SYSCALL_WITH_FLAG(_name, _arg, _flag, _action) \ + KORE_BPF_CMP(SYS_##_name, 0, 8), \ + KORE_BPF_LOAD(args[(_arg)], ARGS_LO_OFFSET), \ + KORE_BPF_AND(((_flag) & 0xffffffff)), \ + KORE_BPF_CMP(((_flag) & 0xffffffff), 0, 4), \ + KORE_BPF_LOAD(args[(_arg)], ARGS_HI_OFFSET), \ + KORE_BPF_AND((((uint32_t)((uint64_t)(_flag) >> 32)) & 0xffffffff)), \ + KORE_BPF_CMP((((uint32_t)((uint64_t)(_flag) >> 32)) & 0xffffffff), 0, 1), \ + KORE_BPF_RET(_action), \ + KORE_BPF_LOAD(nr, 0) -/* Allow a system call completely. */ -#define KORE_SYSCALL_ALLOW(_name) \ +/* Denying of system calls macros (with an errno). */ +#define KORE_SYSCALL_DENY(_name, _errno) \ + KORE_SYSCALL_FILTER(_name, SECCOMP_RET_ERRNO|(_errno)) + +#define KORE_SYSCALL_DENY_ARG(_name, _arg, _val, _errno) \ + KORE_SYSCALL_ARG(_name, _arg, _val, SECCOMP_RET_ERRNO|(_errno)) + +#define KORE_SYSCALL_DENY_MASK(_name, _arg, _val, _errno) \ + KORE_SYSCALL_MASK(_name, _arg, _val, SECCOMP_RET_ERRNO|(_errno)) + +#define KORE_SYSCALL_DENY_WITH_FLAG(_name, _arg, _flag, _errno) \ + KORE_SYSCALL_WITH_FLAG(_name, _arg, _flag, SECCOMP_RET_ERRNO|(_errno)) + +/* Allowing of system call macros. */ +#define KORE_SYSCALL_ALLOW(_name) \ KORE_SYSCALL_FILTER(_name, SECCOMP_RET_ALLOW) -/* Allow system call, but log it. */ -#define KORE_SYSCALL_ALLOW_LOG(_name) \ +#define KORE_SYSCALL_ALLOW_LOG(_name) \ KORE_SYSCALL_FILTER(_name, SECCOMP_RET_LOG) -/* Explicit deny of a system call with an errno code for the caller. */ -#define KORE_SYSCALL_DENY_ERRNO(_name, _errno) \ - KORE_SYSCALL_FILTER(_name, SECCOMP_RET_ERRNO|(_errno)) +#define KORE_SYSCALL_ALLOW_ARG(_name, _arg, _val) \ + KORE_SYSCALL_ARG(_name, _arg, _val, SECCOMP_RET_ALLOW) + +#define KORE_SYSCALL_ALLOW_MASK(_name, _arg, _mask) \ + KORE_SYSCALL_MASK(_name, _arg, _mask, SECCOMP_RET_ALLOW) + +#define KORE_SYSCALL_ALLOW_WITH_FLAG(_name, _arg, _flag) \ + KORE_SYSCALL_WITH_FLAG(_name, _arg, _flag, SECCOMP_RET_ALLOW) + +/* Load field of seccomp_data into accumulator. */ +#define KORE_BPF_LOAD(_field, _off) \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, _field) + _off) + +/* Return a constant from a BPF program. */ +#define KORE_BPF_RET(_retval) \ + BPF_STMT(BPF_RET+BPF_K, _retval) + +/* Compare the accumulator against a constant (==). */ +#define KORE_BPF_CMP(_k, _jt, _jf) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, _k, _jt, _jf) + +/* AND operation on the accumulator. */ +#define KORE_BPF_AND(_k) \ + BPF_STMT(BPF_ALU+BPF_AND+BPF_K, _k) /* The length of a filter. */ #define KORE_FILTER_LEN(x) (sizeof(x) / sizeof(x[0])) @@ -52,7 +143,6 @@ * KORE_SYSCALL_DENY_ERRNO(ioctl, EACCESS), * KORE_SYSCALL_ALLOW(poll), * ); - * */ #define KORE_SECCOMP_FILTER(name, ...) \ struct sock_filter _scfilt[] = { \ diff --git a/src/curl.c b/src/curl.c @@ -28,8 +28,10 @@ static struct sock_filter filter_curl[] = { /* Allow sockets and libcurl to call connect. */ KORE_SYSCALL_ALLOW(bind), - KORE_SYSCALL_ALLOW(socket), KORE_SYSCALL_ALLOW(connect), + KORE_SYSCALL_ALLOW_ARG(socket, 0, AF_INET), + KORE_SYSCALL_ALLOW_ARG(socket, 0, AF_INET6), + KORE_SYSCALL_ALLOW_ARG(socket, 0, AF_UNIX), /* Threading related. */ KORE_SYSCALL_ALLOW(clone), diff --git a/src/pgsql.c b/src/pgsql.c @@ -32,10 +32,13 @@ #include "seccomp.h" static struct sock_filter filter_pgsql[] = { - KORE_SYSCALL_ALLOW(socket), + /* Allow us to create sockets and call connect. */ KORE_SYSCALL_ALLOW(connect), - KORE_SYSCALL_ALLOW(sendto), - KORE_SYSCALL_ALLOW(recvfrom), + KORE_SYSCALL_ALLOW_ARG(socket, 0, AF_INET), + KORE_SYSCALL_ALLOW_ARG(socket, 0, AF_INET6), + KORE_SYSCALL_ALLOW_ARG(socket, 0, AF_UNIX), + + /* Requires these calls. */ KORE_SYSCALL_ALLOW(getsockopt), KORE_SYSCALL_ALLOW(getsockname), }; diff --git a/src/seccomp.c b/src/seccomp.c @@ -15,6 +15,7 @@ */ #include <sys/param.h> +#include <sys/mman.h> #include <sys/epoll.h> #include <sys/prctl.h> #include <sys/syscall.h> @@ -42,7 +43,7 @@ */ static struct sock_filter filter_kore[] = { /* Deny these, but with EACCESS instead of dying. */ - KORE_SYSCALL_DENY_ERRNO(ioctl, EACCES), + KORE_SYSCALL_DENY(ioctl, EACCES), /* File related. */ KORE_SYSCALL_ALLOW(open), @@ -54,10 +55,10 @@ static struct sock_filter filter_kore[] = { KORE_SYSCALL_ALLOW(fcntl), KORE_SYSCALL_ALLOW(lseek), KORE_SYSCALL_ALLOW(close), + KORE_SYSCALL_ALLOW(openat), KORE_SYSCALL_ALLOW(access), KORE_SYSCALL_ALLOW(writev), KORE_SYSCALL_ALLOW(getcwd), - KORE_SYSCALL_ALLOW(openat), KORE_SYSCALL_ALLOW(unlink), /* Process related. */ @@ -70,8 +71,13 @@ static struct sock_filter filter_kore[] = { /* Memory related. */ KORE_SYSCALL_ALLOW(brk), - KORE_SYSCALL_ALLOW(mmap), KORE_SYSCALL_ALLOW(munmap), + + /* Deny mmap/mprotect calls with PROT_EXEC/PROT_WRITE protection. */ + KORE_SYSCALL_DENY_WITH_FLAG(mmap, 2, PROT_EXEC | PROT_WRITE, EINVAL), + KORE_SYSCALL_DENY_WITH_FLAG(mprotect, 2, PROT_EXEC, EINVAL), + + KORE_SYSCALL_ALLOW(mmap), KORE_SYSCALL_ALLOW(mprotect), /* Net related. */ @@ -103,14 +109,14 @@ static struct sock_filter filter_kore[] = { /* bpf program prologue. */ static struct sock_filter filter_prologue[] = { /* Load arch member into accumulator (A) (arch is __u32). */ - BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, arch)), + KORE_BPF_LOAD(arch, 0), /* Compare accumulator against constant, if false jump over kill. */ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL), + KORE_BPF_CMP(SECCOMP_AUDIT_ARCH, 1, 0), + KORE_BPF_RET(SECCOMP_RET_KILL), - /* Load system call member into accumulator (nr is int). */ - BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)), + /* Load the system call number into the accumulator. */ + KORE_BPF_LOAD(nr, 0), }; /* bpf program epilogue. */