commit 95bacb56903b0128fda5326bbb7e515830c80467
parent 7df5339c8d97cd05941915eaa74c907c78b40bc2
Author: Joris Vink <joris@coders.se>
Date: Sun, 7 Jul 2013 14:48:32 +0200
Kore will now keep track of page handlers that cause workers to die.
This is useful to track down any issues you might have in your module.
A log entry with a page handler causing issues looks like:
Jul 7 14:44:30 devbook kore[18191]: [parent]: worker 1 (18193)-> status 11
Jul 7 14:44:30 devbook kore[18191]: [parent]: worker 1 (pid: 18193) (hdlr: 0x242d9c0) gone
Jul 7 14:44:30 devbook kore[18191]: [parent]: hdlr serve_intro has caused 2 error(s)
Diffstat:
4 files changed, 36 insertions(+), 21 deletions(-)
diff --git a/includes/kore.h b/includes/kore.h
@@ -131,6 +131,7 @@ struct kore_module_handle {
char *func;
void *addr;
int type;
+ int errors;
regex_t rctx;
TAILQ_ENTRY(kore_module_handle) list;
@@ -144,6 +145,7 @@ struct kore_worker {
u_int8_t has_lock;
u_int16_t accepted;
u_int16_t accept_treshold;
+ struct kore_module_handle *active_hdlr;
};
struct kore_domain {
@@ -256,10 +258,10 @@ void kore_module_load(char *);
void kore_module_reload(void);
int kore_module_loaded(void);
void kore_domain_closelogs(void);
-void *kore_module_handler_find(char *, char *);
void kore_domain_sslstart(struct kore_domain *);
int kore_module_handler_new(char *, char *, char *, int);
-struct kore_domain *kore_domain_lookup(const char *);
+struct kore_domain *kore_domain_lookup(const char *);
+struct kore_module_handle *kore_module_handler_find(char *, char *);
void fatal(const char *, ...);
void kore_debug_internal(char *, int, const char *, ...);
diff --git a/src/http.c b/src/http.c
@@ -92,8 +92,9 @@ http_request_new(struct connection *c, struct spdy_stream *s, char *host,
void
http_process(void)
{
- struct http_request *req, *next;
- int r, (*hdlr)(struct http_request *);
+ struct http_request *req, *next;
+ struct kore_module_handle *hdlr;
+ int r, (*cb)(struct http_request *);
for (req = TAILQ_FIRST(&http_requests); req != NULL; req = next) {
next = TAILQ_NEXT(req, list);
@@ -110,10 +111,15 @@ http_process(void)
hdlr = kore_module_handler_find(req->host, req->path);
req->start = kore_time_ms();
- if (hdlr == NULL)
+ if (hdlr == NULL) {
r = http_generic_404(req);
- else
- r = hdlr(req);
+ } else {
+ cb = hdlr->addr;
+
+ worker->active_hdlr = hdlr;
+ r = cb(req);
+ worker->active_hdlr = NULL;
+ }
req->end = kore_time_ms();
switch (r) {
diff --git a/src/module.c b/src/module.c
@@ -71,6 +71,7 @@ kore_module_reload(void)
TAILQ_FOREACH(dom, &domains, list) {
TAILQ_FOREACH(hdlr, &(dom->handlers), list) {
+ hdlr->errors = 0;
hdlr->addr = dlsym(mod_handle, hdlr->func);
if (hdlr->func == NULL)
fatal("no function '%s' found", hdlr->func);
@@ -113,6 +114,7 @@ kore_module_handler_new(char *path, char *domain, char *func, int type)
return (KORE_RESULT_ERROR);
hdlr = (struct kore_module_handle *)kore_malloc(sizeof(*hdlr));
+ hdlr->errors = 0;
hdlr->addr = addr;
hdlr->type = type;
hdlr->path = kore_strdup(path);
@@ -132,7 +134,7 @@ kore_module_handler_new(char *path, char *domain, char *func, int type)
return (KORE_RESULT_OK);
}
-void *
+struct kore_module_handle *
kore_module_handler_find(char *domain, char *path)
{
struct kore_domain *dom;
@@ -144,10 +146,10 @@ kore_module_handler_find(char *domain, char *path)
TAILQ_FOREACH(hdlr, &(dom->handlers), list) {
if (hdlr->type == HANDLER_TYPE_STATIC) {
if (!strcmp(hdlr->path, path))
- return (hdlr->addr);
+ return (hdlr);
} else {
if (!regexec(&(hdlr->rctx), path, 0, NULL, 0))
- return (hdlr->addr);
+ return (hdlr);
}
}
diff --git a/src/worker.c b/src/worker.c
@@ -109,8 +109,10 @@ kore_worker_spawn(u_int16_t id, u_int16_t cpu)
kw->cpu = cpu;
kw->load = 0;
kw->accepted = 0;
- kw->pid = fork();
+ kw->has_lock = 0;
+ kw->active_hdlr = NULL;
+ kw->pid = fork();
if (kw->pid == -1)
fatal("could not spawn worker child: %s", errno_s);
@@ -170,7 +172,6 @@ kore_worker_entry(struct kore_worker *kw)
u_int64_t now, idle_check;
worker = kw;
- kw->has_lock = 0;
if (chroot(chroot_path) == -1)
fatal("cannot chroot(): %s", errno_s);
@@ -325,22 +326,26 @@ kore_worker_wait(int final)
if (WEXITSTATUS(status) || WTERMSIG(status) ||
WCOREDUMP(status)) {
kore_log(LOG_NOTICE,
- "worker %d (pid: %d) gone, respawning new one",
- kw->id, kw->pid);
-
- if (kw->pid == accept_lock->lock) {
- kore_log(LOG_NOTICE,
- "worker %d owned accept lock, releasing",
- kw->id);
+ "worker %d (pid: %d) (hdlr: %p) gone",
+ kw->id, kw->pid, kw->active_hdlr);
+ if (kw->pid == accept_lock->lock)
accept_lock->lock = accept_lock->next;
+
+ if (kw->active_hdlr != NULL) {
+ kw->active_hdlr->errors++;
+ kore_log(LOG_NOTICE,
+ "hdlr %s has caused %d error(s)",
+ kw->active_hdlr->func,
+ kw->active_hdlr->errors);
}
+ kore_log(LOG_NOTICE, "restarting worker %d", kw->id);
kore_worker_spawn(kw->id, kw->cpu);
} else {
kore_log(LOG_NOTICE,
- "worker %d (pid: %d) signaled us",
- kw->id, kw->pid);
+ "worker %d (pid: %d) signaled us (%d)",
+ kw->id, kw->pid, status);
}
break;