/* * This file is part of DisOrder. * Copyright (C) 2004, 2005, 2007, 2008 Richard Kettlewell * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /** @file lib/event.c * @brief DisOrder event loop */ #include "common.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "event.h" #include "mem.h" #include "log.h" #include "syscalls.h" #include "printf.h" #include "sink.h" #include "vector.h" #include "timeval.h" #include "heap.h" /** @brief A timeout */ struct timeout { struct timeout *next; struct timeval when; ev_timeout_callback *callback; void *u; int active; }; /** @brief Comparison function for timeouts */ static int timeout_lt(const struct timeout *a, const struct timeout *b) { return tvlt(&a->when, &b->when); } HEAP_TYPE(timeout_heap, struct timeout *, timeout_lt); HEAP_DEFINE(timeout_heap, struct timeout *, timeout_lt); /** @brief A file descriptor in one mode */ struct fd { int fd; ev_fd_callback *callback; void *u; const char *what; }; /** @brief All the file descriptors in a given mode */ struct fdmode { /** @brief Mask of active file descriptors passed to @c select() */ fd_set enabled; /** @brief File descriptor mask returned from @c select() */ fd_set tripped; /** @brief Number of file descriptors in @p fds */ int nfds; /** @brief Number of slots in @p fds */ int fdslots; /** @brief Array of all active file descriptors */ struct fd *fds; /** @brief Highest-numbered file descriptor or 0 */ int maxfd; }; /** @brief A signal handler */ struct signal { struct sigaction oldsa; ev_signal_callback *callback; void *u; }; /** @brief A child process */ struct child { pid_t pid; int options; ev_child_callback *callback; void *u; }; /** @brief An event loop */ struct ev_source { /** @brief File descriptors, per mode */ struct fdmode mode[ev_nmodes]; /** @brief Heap of timeouts */ struct timeout_heap timeouts[1]; /** @brief Array of handled signals */ struct signal signals[NSIG]; /** @brief Mask of handled signals */ sigset_t sigmask; /** @brief Escape early from handling of @c select() results * * This is set if any of the file descriptor arrays are invalidated, since * it's then not safe for processing of them to continue. */ int escape; /** @brief Signal handling pipe * * The signal handle writes signal numbers down this pipe. */ int sigpipe[2]; /** @brief Number of child processes in @p children */ int nchildren; /** @brief Number of slots in @p children */ int nchildslots; /** @brief Array of child processes */ struct child *children; }; /** @brief Names of file descriptor modes */ static const char *modenames[] = { "read", "write", "except" }; /* utilities ******************************************************************/ /* creation *******************************************************************/ /** @brief Create a new event loop */ ev_source *ev_new(void) { ev_source *ev = xmalloc(sizeof *ev); int n; memset(ev, 0, sizeof *ev); for(n = 0; n < ev_nmodes; ++n) FD_ZERO(&ev->mode[n].enabled); ev->sigpipe[0] = ev->sigpipe[1] = -1; sigemptyset(&ev->sigmask); timeout_heap_init(ev->timeouts); return ev; } /* event loop *****************************************************************/ /** @brief Run the event loop * @return -1 on error, non-0 if any callback returned non-0 */ int ev_run(ev_source *ev) { for(;;) { struct timeval now; struct timeval delta; int n, mode; int ret; int maxfd; struct timeout *timeouts, *t, **tt; struct stat sb; xgettimeofday(&now, 0); /* Handle timeouts. We don't want to handle any timeouts that are added * while we're handling them (otherwise we'd have to break out of infinite * loops, preferrably without starving better-behaved subsystems). Hence * the slightly complicated two-phase approach here. */ /* First we read those timeouts that have triggered out of the heap. We * keep them in the same order they came out of the heap in. */ tt = &timeouts; while(timeout_heap_count(ev->timeouts) && tvle(&timeout_heap_first(ev->timeouts)->when, &now)) { /* This timeout has reached its trigger time; provided it has not been * cancelled we add it to the timeouts list. */ t = timeout_heap_remove(ev->timeouts); if(t->active) { *tt = t; tt = &t->next; } } *tt = 0; /* Now we can run the callbacks for those timeouts. They might add further * timeouts that are already in the past but they won't trigger until the * next time round the event loop. */ for(t = timeouts; t; t = t->next) { D(("calling timeout for %ld.%ld callback %p %p", (long)t->when.tv_sec, (long)t->when.tv_usec, (void *)t->callback, t->u)); ret = t->callback(ev, &now, t->u); if(ret) return ret; } maxfd = 0; for(mode = 0; mode < ev_nmodes; ++mode) { ev->mode[mode].tripped = ev->mode[mode].enabled; if(ev->mode[mode].maxfd > maxfd) maxfd = ev->mode[mode].maxfd; } xsigprocmask(SIG_UNBLOCK, &ev->sigmask, 0); do { if(timeout_heap_count(ev->timeouts)) { t = timeout_heap_first(ev->timeouts); xgettimeofday(&now, 0); delta.tv_sec = t->when.tv_sec - now.tv_sec; delta.tv_usec = t->when.tv_usec - now.tv_usec; if(delta.tv_usec < 0) { delta.tv_usec += 1000000; --delta.tv_sec; } if(delta.tv_sec < 0) delta.tv_sec = delta.tv_usec = 0; n = select(maxfd + 1, &ev->mode[ev_read].tripped, &ev->mode[ev_write].tripped, &ev->mode[ev_except].tripped, &delta); } else { n = select(maxfd + 1, &ev->mode[ev_read].tripped, &ev->mode[ev_write].tripped, &ev->mode[ev_except].tripped, 0); } } while(n < 0 && errno == EINTR); xsigprocmask(SIG_BLOCK, &ev->sigmask, 0); if(n < 0) { error(errno, "error calling select"); if(errno == EBADF) { /* If there's a bad FD in the mix then check them all and log what we * find, to ease debugging */ for(mode = 0; mode < ev_nmodes; ++mode) { for(n = 0; n < ev->mode[mode].nfds; ++n) { const int fd = ev->mode[mode].fds[n].fd; if(FD_ISSET(fd, &ev->mode[mode].enabled) && fstat(fd, &sb) < 0) error(errno, "mode %s fstat %d (%s)", modenames[mode], fd, ev->mode[mode].fds[n].what); } for(n = 0; n <= maxfd; ++n) if(FD_ISSET(n, &ev->mode[mode].enabled) && fstat(n, &sb) < 0) error(errno, "mode %s fstat %d", modenames[mode], n); } } return -1; } if(n > 0) { /* if anything deranges the meaning of an fd, or re-orders the * fds[] tables, we'd better give up; such operations will * therefore set @escape@. */ ev->escape = 0; for(mode = 0; mode < ev_nmodes && !ev->escape; ++mode) for(n = 0; n < ev->mode[mode].nfds && !ev->escape; ++n) { int fd = ev->mode[mode].fds[n].fd; if(FD_ISSET(fd, &ev->mode[mode].tripped)) { D(("calling %s fd %d callback %p %p", modenames[mode], fd, (void *)ev->mode[mode].fds[n].callback, ev->mode[mode].fds[n].u)); ret = ev->mode[mode].fds[n].callback(ev, fd, ev->mode[mode].fds[n].u); if(ret) return ret; } } } /* we'll pick up timeouts back round the loop */ } } /* file descriptors ***********************************************************/ /** @brief Register a file descriptor * @param ev Event loop * @param mode @c ev_read or @c ev_write * @param fd File descriptor * @param callback Called when @p is readable/writable * @param u Passed to @p callback * @param what Text description * @return 0 on success, non-0 on error * * Sets @ref ev_source::escape, so no further processing of file descriptors * will occur this time round the event loop. */ int ev_fd(ev_source *ev, ev_fdmode mode, int fd, ev_fd_callback *callback, void *u, const char *what) { int n; D(("registering %s fd %d callback %p %p", modenames[mode], fd, (void *)callback, u)); if(fd >= FD_SETSIZE) return -1; assert(mode < ev_nmodes); if(ev->mode[mode].nfds >= ev->mode[mode].fdslots) { ev->mode[mode].fdslots = (ev->mode[mode].fdslots ? 2 * ev->mode[mode].fdslots : 16); D(("expanding %s fd table to %d entries", modenames[mode], ev->mode[mode].fdslots)); ev->mode[mode].fds = xrealloc(ev->mode[mode].fds, ev->mode[mode].fdslots * sizeof (struct fd)); } n = ev->mode[mode].nfds++; FD_SET(fd, &ev->mode[mode].enabled); ev->mode[mode].fds[n].fd = fd; ev->mode[mode].fds[n].callback = callback; ev->mode[mode].fds[n].u = u; ev->mode[mode].fds[n].what = what; if(fd > ev->mode[mode].maxfd) ev->mode[mode].maxfd = fd; ev->escape = 1; return 0; } /** @brief Cancel a file descriptor * @param ev Event loop * @param mode @c ev_read or @c ev_write * @param fd File descriptor * @return 0 on success, non-0 on error * * Sets @ref ev_source::escape, so no further processing of file descriptors * will occur this time round the event loop. */ int ev_fd_cancel(ev_source *ev, ev_fdmode mode, int fd) { int n; int maxfd; D(("cancelling mode %s fd %d", modenames[mode], fd)); /* find the right struct fd */ for(n = 0; n < ev->mode[mode].nfds && fd != ev->mode[mode].fds[n].fd; ++n) ; assert(n < ev->mode[mode].nfds); /* swap in the last fd and reduce the count */ if(n != ev->mode[mode].nfds - 1) ev->mode[mode].fds[n] = ev->mode[mode].fds[ev->mode[mode].nfds - 1]; --ev->mode[mode].nfds; /* if that was the biggest fd, find the new biggest one */ if(fd == ev->mode[mode].maxfd) { maxfd = 0; for(n = 0; n < ev->mode[mode].nfds; ++n) if(ev->mode[mode].fds[n].fd > maxfd) maxfd = ev->mode[mode].fds[n].fd; ev->mode[mode].maxfd = maxfd; } /* don't tell select about this fd any more */ FD_CLR(fd, &ev->mode[mode].enabled); ev->escape = 1; return 0; } /** @brief Re-enable a file descriptor * @param ev Event loop * @param mode @c ev_read or @c ev_write * @param fd File descriptor * @return 0 on success, non-0 on error * * It is harmless if @p fd is currently disabled, but it must not have been * cancelled. */ int ev_fd_enable(ev_source *ev, ev_fdmode mode, int fd) { assert(fd >= 0); D(("enabling mode %s fd %d", modenames[mode], fd)); FD_SET(fd, &ev->mode[mode].enabled); return 0; } /** @brief Temporarily disable a file descriptor * @param ev Event loop * @param mode @c ev_read or @c ev_write * @param fd File descriptor * @return 0 on success, non-0 on error * * Re-enable with ev_fd_enable(). It is harmless if @p fd is already disabled, * but it must not have been cancelled. */ int ev_fd_disable(ev_source *ev, ev_fdmode mode, int fd) { D(("disabling mode %s fd %d", modenames[mode], fd)); FD_CLR(fd, &ev->mode[mode].enabled); FD_CLR(fd, &ev->mode[mode].tripped); /* Suppress any pending callbacks */ ev->escape = 1; return 0; } /** @brief Log a report of file descriptor state */ void ev_report(ev_source *ev) { int n, fd; ev_fdmode mode; struct dynstr d[1]; char b[4096]; if(!debugging) return; dynstr_init(d); for(mode = 0; mode < ev_nmodes; ++mode) { D(("mode %s maxfd %d", modenames[mode], ev->mode[mode].maxfd)); for(n = 0; n < ev->mode[mode].nfds; ++n) { fd = ev->mode[mode].fds[n].fd; D(("fd %s %d%s%s (%s)", modenames[mode], fd, FD_ISSET(fd, &ev->mode[mode].enabled) ? " enabled" : "", FD_ISSET(fd, &ev->mode[mode].tripped) ? " tripped" : "", ev->mode[mode].fds[n].what)); } d->nvec = 0; for(fd = 0; fd <= ev->mode[mode].maxfd; ++fd) { if(!FD_ISSET(fd, &ev->mode[mode].enabled)) continue; for(n = 0; n < ev->mode[mode].nfds; ++n) { if(ev->mode[mode].fds[n].fd == fd) break; } if(n < ev->mode[mode].nfds) snprintf(b, sizeof b, "%d(%s)", fd, ev->mode[mode].fds[n].what); else snprintf(b, sizeof b, "%d", fd); dynstr_append(d, ' '); dynstr_append_string(d, b); } dynstr_terminate(d); D(("%s enabled:%s", modenames[mode], d->vec)); } } /* timeouts *******************************************************************/ /** @brief Register a timeout * @param ev Event source * @param handlep Where to store timeout handle, or @c NULL * @param when Earliest time to call @p callback, or @c NULL * @param callback Function to call at or after @p when * @param u Passed to @p callback * @return 0 on success, non-0 on error * * If @p when is a null pointer then a time of 0 is assumed. The effect is to * call the timeout handler from ev_run() next time around the event loop. * This is used internally to schedule various operations if it is not * convenient to call them from the current place in the call stack, or * externally to ensure that other clients of the event loop get a look in when * performing some lengthy operation. */ int ev_timeout(ev_source *ev, ev_timeout_handle *handlep, const struct timeval *when, ev_timeout_callback *callback, void *u) { struct timeout *t; D(("registering timeout at %ld.%ld callback %p %p", when ? (long)when->tv_sec : 0, when ? (long)when->tv_usec : 0, (void *)callback, u)); t = xmalloc(sizeof *t); if(when) t->when = *when; t->callback = callback; t->u = u; t->active = 1; timeout_heap_insert(ev->timeouts, t); if(handlep) *handlep = t; return 0; } /** @brief Cancel a timeout * @param ev Event loop * @param handle Handle returned from ev_timeout(), or 0 * @return 0 on success, non-0 on error * * If @p handle is 0 then this is a no-op. */ int ev_timeout_cancel(ev_source attribute((unused)) *ev, ev_timeout_handle handle) { struct timeout *t = handle; if(t) t->active = 0; return 0; } /* signals ********************************************************************/ /** @brief Mapping of signals to pipe write ends * * The pipes are per-event loop, it's possible in theory for there to be * multiple event loops (e.g. in different threads), although in fact DisOrder * does not do this. */ static int sigfd[NSIG]; /** @brief The signal handler * @param s Signal number * * Writes to @c sigfd[s]. */ static void sighandler(int s) { unsigned char sc = s; static const char errmsg[] = "error writing to signal pipe"; /* probably the reader has stopped listening for some reason */ if(write(sigfd[s], &sc, 1) < 0) { /* do the best we can as we're about to abort; shut _up_, gcc */ int _ignore = write(2, errmsg, sizeof errmsg - 1); (void)_ignore; abort(); } } /** @brief Read callback for signals */ static int signal_read(ev_source *ev, int attribute((unused)) fd, void attribute((unused)) *u) { unsigned char s; int n; int ret; if((n = read(ev->sigpipe[0], &s, 1)) == 1) if((ret = ev->signals[s].callback(ev, s, ev->signals[s].u))) return ret; assert(n != 0); if(n < 0 && (errno != EINTR && errno != EAGAIN)) { error(errno, "error reading from signal pipe %d", ev->sigpipe[0]); return -1; } return 0; } /** @brief Close the signal pipe */ static void close_sigpipe(ev_source *ev) { int save_errno = errno; xclose(ev->sigpipe[0]); xclose(ev->sigpipe[1]); ev->sigpipe[0] = ev->sigpipe[1] = -1; errno = save_errno; } /** @brief Register a signal handler * @param ev Event loop * @param sig Signal to handle * @param callback Called when signal is delivered * @param u Passed to @p callback * @return 0 on success, non-0 on error * * Note that @p callback is called from inside ev_run(), not from inside the * signal handler, so the usual restrictions on signal handlers do not apply. */ int ev_signal(ev_source *ev, int sig, ev_signal_callback *callback, void *u) { int n; struct sigaction sa; D(("registering signal %d handler callback %p %p", sig, (void *)callback, u)); assert(sig > 0); assert(sig < NSIG); assert(sig <= UCHAR_MAX); if(ev->sigpipe[0] == -1) { D(("creating signal pipe")); xpipe(ev->sigpipe); D(("signal pipe is %d, %d", ev->sigpipe[0], ev->sigpipe[1])); for(n = 0; n < 2; ++n) { nonblock(ev->sigpipe[n]); cloexec(ev->sigpipe[n]); } if(ev_fd(ev, ev_read, ev->sigpipe[0], signal_read, 0, "sigpipe read")) { close_sigpipe(ev); return -1; } } sigaddset(&ev->sigmask, sig); xsigprocmask(SIG_BLOCK, &ev->sigmask, 0); sigfd[sig] = ev->sigpipe[1]; ev->signals[sig].callback = callback; ev->signals[sig].u = u; sa.sa_handler = sighandler; sigfillset(&sa.sa_mask); sa.sa_flags = SA_RESTART; xsigaction(sig, &sa, &ev->signals[sig].oldsa); ev->escape = 1; return 0; } /** @brief Cancel a signal handler * @param ev Event loop * @param sig Signal to cancel * @return 0 on success, non-0 on error */ int ev_signal_cancel(ev_source *ev, int sig) { sigset_t ss; xsigaction(sig, &ev->signals[sig].oldsa, 0); ev->signals[sig].callback = 0; ev->escape = 1; sigdelset(&ev->sigmask, sig); sigemptyset(&ss); sigaddset(&ss, sig); xsigprocmask(SIG_UNBLOCK, &ss, 0); return 0; } /** @brief Clean up signal handling * @param ev Event loop * * This function can be called from inside a fork. It restores signal * handlers, unblocks the signals, and closes the signal pipe for @p ev. */ void ev_signal_atfork(ev_source *ev) { int sig; if(ev->sigpipe[0] != -1) { /* revert any handled signals to their original state */ for(sig = 1; sig < NSIG; ++sig) { if(ev->signals[sig].callback != 0) xsigaction(sig, &ev->signals[sig].oldsa, 0); } /* and then unblock them */ xsigprocmask(SIG_UNBLOCK, &ev->sigmask, 0); /* don't want a copy of the signal pipe open inside the fork */ xclose(ev->sigpipe[0]); xclose(ev->sigpipe[1]); } } /* child processes ************************************************************/ /** @brief Called on SIGCHLD */ static int sigchld_callback(ev_source *ev, int attribute((unused)) sig, void attribute((unused)) *u) { struct rusage ru; pid_t r; int status, n, ret, revisit; do { revisit = 0; for(n = 0; n < ev->nchildren; ++n) { r = wait4(ev->children[n].pid, &status, ev->children[n].options | WNOHANG, &ru); if(r > 0) { ev_child_callback *c = ev->children[n].callback; void *cu = ev->children[n].u; if(WIFEXITED(status) || WIFSIGNALED(status)) ev_child_cancel(ev, r); revisit = 1; if((ret = c(ev, r, status, &ru, cu))) return ret; } else if(r < 0) { /* We should "never" get an ECHILD but it can in fact happen. For * instance on Linux 2.4.31, and probably other versions, if someone * straces a child process and then a different child process * terminates, when we wait4() the trace process we will get ECHILD * because it has been reparented to strace. Obviously this is a * hopeless design flaw in the tracing infrastructure, but we don't * want the disorder server to bomb out because of it. So we just log * the problem and ignore it. */ error(errno, "error calling wait4 for PID %lu (broken ptrace?)", (unsigned long)ev->children[n].pid); if(errno != ECHILD) return -1; } } } while(revisit); return 0; } /** @brief Configure event loop for child process handling * @return 0 on success, non-0 on error * * Currently at most one event loop can handle child processes and it must be * distinguished from others by calling this function on it. This could be * fixed but since no process ever makes use of more than one event loop there * is no need. */ int ev_child_setup(ev_source *ev) { D(("installing SIGCHLD handler")); return ev_signal(ev, SIGCHLD, sigchld_callback, 0); } /** @brief Wait for a child process to terminate * @param ev Event loop * @param pid Process ID of child * @param options Options to pass to @c wait4() * @param callback Called when child terminates (or possibly when it stops) * @param u Passed to @p callback * @return 0 on success, non-0 on error * * You must have called ev_child_setup() on @p ev once first. */ int ev_child(ev_source *ev, pid_t pid, int options, ev_child_callback *callback, void *u) { int n; D(("registering child handling %ld options %d callback %p %p", (long)pid, options, (void *)callback, u)); assert(ev->signals[SIGCHLD].callback == sigchld_callback); if(ev->nchildren >= ev->nchildslots) { ev->nchildslots = ev->nchildslots ? 2 * ev->nchildslots : 16; ev->children = xrealloc(ev->children, ev->nchildslots * sizeof (struct child)); } n = ev->nchildren++; ev->children[n].pid = pid; ev->children[n].options = options; ev->children[n].callback = callback; ev->children[n].u = u; return 0; } /** @brief Stop waiting for a child process * @param ev Event loop * @param pid Child process ID * @return 0 on success, non-0 on error */ int ev_child_cancel(ev_source *ev, pid_t pid) { int n; for(n = 0; n < ev->nchildren && ev->children[n].pid != pid; ++n) ; assert(n < ev->nchildren); if(n != ev->nchildren - 1) ev->children[n] = ev->children[ev->nchildren - 1]; --ev->nchildren; return 0; } /* socket listeners ***********************************************************/ /** @brief State for a socket listener */ struct listen_state { ev_listen_callback *callback; void *u; }; /** @brief Called when a listenign socket is readable */ static int listen_callback(ev_source *ev, int fd, void *u) { const struct listen_state *l = u; int newfd; union { struct sockaddr_in in; #if HAVE_STRUCT_SOCKADDR_IN6 struct sockaddr_in6 in6; #endif struct sockaddr_un un; struct sockaddr sa; } addr; socklen_t addrlen; int ret; D(("callback for listener fd %d", fd)); while((addrlen = sizeof addr), (newfd = accept(fd, &addr.sa, &addrlen)) >= 0) { if((ret = l->callback(ev, newfd, &addr.sa, addrlen, l->u))) return ret; } switch(errno) { case EINTR: case EAGAIN: break; #ifdef ECONNABORTED case ECONNABORTED: error(errno, "error calling accept"); break; #endif #ifdef EPROTO case EPROTO: /* XXX on some systems EPROTO should be fatal, but we don't know if * we're running on one of them */ error(errno, "error calling accept"); break; #endif default: fatal(errno, "error calling accept"); break; } if(errno != EINTR && errno != EAGAIN) error(errno, "error calling accept"); return 0; } /** @brief Listen on a socket for inbound stream connections * @param ev Event source * @param fd File descriptor of socket * @param callback Called when a new connection arrives * @param u Passed to @p callback * @param what Text description of socket * @return 0 on success, non-0 on error */ int ev_listen(ev_source *ev, int fd, ev_listen_callback *callback, void *u, const char *what) { struct listen_state *l = xmalloc(sizeof *l); D(("registering listener fd %d callback %p %p", fd, (void *)callback, u)); l->callback = callback; l->u = u; return ev_fd(ev, ev_read, fd, listen_callback, l, what); } /** @brief Stop listening on a socket * @param ev Event loop * @param fd File descriptor of socket * @return 0 on success, non-0 on error */ int ev_listen_cancel(ev_source *ev, int fd) { D(("cancelling listener fd %d", fd)); return ev_fd_cancel(ev, ev_read, fd); } /* buffer *********************************************************************/ /** @brief Buffer structure */ struct buffer { char *base, *start, *end, *top; }; /* @brief Make sure there is @p bytes available at @c b->end */ static void buffer_space(struct buffer *b, size_t bytes) { D(("buffer_space %p %p %p %p want %lu", (void *)b->base, (void *)b->start, (void *)b->end, (void *)b->top, (unsigned long)bytes)); if(b->start == b->end) b->start = b->end = b->base; if((size_t)(b->top - b->end) < bytes) { if((size_t)((b->top - b->end) + (b->start - b->base)) < bytes) { size_t newspace = b->end - b->start + bytes, n; char *newbase; for(n = 16; n < newspace; n *= 2) ; newbase = xmalloc_noptr(n); memcpy(newbase, b->start, b->end - b->start); b->base = newbase; b->end = newbase + (b->end - b->start); b->top = newbase + n; b->start = newbase; /* must be last */ } else { memmove(b->base, b->start, b->end - b->start); b->end = b->base + (b->end - b->start); b->start = b->base; } } D(("result %p %p %p %p", (void *)b->base, (void *)b->start, (void *)b->end, (void *)b->top)); } /* readers and writers *******************************************************/ /** @brief State structure for a buffered writer */ struct ev_writer { /** @brief Sink used for writing to the buffer */ struct sink s; /** @brief Output buffer */ struct buffer b; /** @brief File descriptor to write to */ int fd; /** @brief Set if there'll be no more output */ int eof; /** @brief Error/termination callback */ ev_error_callback *callback; /** @brief Passed to @p callback */ void *u; /** @brief Parent event source */ ev_source *ev; /** @brief Maximum amount of time between succesful writes, 0 = don't care */ int timebound; /** @brief Maximum amount of data to buffer, 0 = don't care */ int spacebound; /** @brief Error code to pass to @p callback (see writer_shutdown()) */ int error; /** @brief Timeout handle for @p timebound (or 0) */ ev_timeout_handle timeout; /** @brief Description of this writer */ const char *what; /** @brief Tied reader or 0 */ ev_reader *reader; /** @brief Set when abandoned */ int abandoned; }; /** @brief State structure for a buffered reader */ struct ev_reader { /** @brief Input buffer */ struct buffer b; /** @brief File descriptor read from */ int fd; /** @brief Called when new data is available */ ev_reader_callback *callback; /** @brief Called on error and shutdown */ ev_error_callback *error_callback; /** @brief Passed to @p callback and @p error_callback */ void *u; /** @brief Parent event loop */ ev_source *ev; /** @brief Set when EOF is detected */ int eof; /** @brief Error code to pass to error callback */ int error; /** @brief Tied writer or NULL */ ev_writer *writer; }; /* buffered writer ************************************************************/ /** @brief Shut down the writer * * This is called to shut down a writer. The error callback is not called * through any other path. Also we do not cancel @p fd from anywhere else, * though we might disable it. * * It has the signature of a timeout callback so that it can be called from a * time=0 timeout. * * Calls @p callback with @p w->syntherr as the error code (which might be 0). */ static int writer_shutdown(ev_source *ev, const attribute((unused)) struct timeval *now, void *u) { ev_writer *w = u; if(w->fd == -1) return 0; /* already shut down */ D(("writer_shutdown fd=%d error=%d", w->fd, w->error)); ev_timeout_cancel(ev, w->timeout); ev_fd_cancel(ev, ev_write, w->fd); w->timeout = 0; if(w->reader) { D(("found a tied reader")); /* If there is a reader still around we just untie it */ w->reader->writer = 0; shutdown(w->fd, SHUT_WR); /* there'll be no more writes */ } else { D(("no tied reader")); /* There's no reader so we are free to close the FD */ xclose(w->fd); } w->fd = -1; return w->callback(ev, w->error, w->u); } /** @brief Called when a writer's @p timebound expires */ static int writer_timebound_exceeded(ev_source *ev, const struct timeval *now, void *u) { ev_writer *const w = u; if(!w->abandoned) { w->abandoned = 1; error(0, "abandoning writer '%s' because no writes within %ds", w->what, w->timebound); w->error = ETIMEDOUT; } return writer_shutdown(ev, now, u); } /** @brief Set the time bound callback (if not set already) */ static void writer_set_timebound(ev_writer *w) { if(w->timebound && !w->timeout) { struct timeval when; ev_source *const ev = w->ev; xgettimeofday(&when, 0); when.tv_sec += w->timebound; ev_timeout(ev, &w->timeout, &when, writer_timebound_exceeded, w); } } /** @brief Called when a writer's file descriptor is writable */ static int writer_callback(ev_source *ev, int fd, void *u) { ev_writer *const w = u; int n; n = write(fd, w->b.start, w->b.end - w->b.start); D(("callback for writer fd %d, %ld bytes, n=%d, errno=%d", fd, (long)(w->b.end - w->b.start), n, errno)); if(n >= 0) { /* Consume bytes from the buffer */ w->b.start += n; /* Suppress any outstanding timeout */ ev_timeout_cancel(ev, w->timeout); w->timeout = 0; if(w->b.start == w->b.end) { /* The buffer is empty */ if(w->eof) { /* We're done, we can shut down this writer */ w->error = 0; return writer_shutdown(ev, 0, w); } else /* There might be more to come but we don't need writer_callback() to * be called for the time being */ ev_fd_disable(ev, ev_write, fd); } else /* The buffer isn't empty, set a timeout so we give up if we don't manage * to write some more within a reasonable time */ writer_set_timebound(w); } else { switch(errno) { case EINTR: case EAGAIN: break; default: w->error = errno; return writer_shutdown(ev, 0, w); } } return 0; } /** @brief Write bytes to a writer's buffer * * This is the sink write callback. * * Calls ev_fd_enable() if necessary (i.e. if the buffer was empty but * now is not). */ static int ev_writer_write(struct sink *sk, const void *s, int n) { ev_writer *w = (ev_writer *)sk; if(!n) return 0; /* avoid silliness */ if(w->fd == -1) error(0, "ev_writer_write on %s after shutdown", w->what); if(w->spacebound && w->b.end - w->b.start + n > w->spacebound) { /* The new buffer contents will exceed the space bound. We assume that the * remote client has gone away and TCP hasn't noticed yet, or that it's got * hopelessly stuck. */ if(!w->abandoned) { w->abandoned = 1; error(0, "abandoning writer '%s' because buffer has reached %td bytes", w->what, w->b.end - w->b.start); ev_fd_disable(w->ev, ev_write, w->fd); w->error = EPIPE; return ev_timeout(w->ev, 0, 0, writer_shutdown, w); } else return 0; } /* Make sure there is space */ buffer_space(&w->b, n); /* If the buffer was formerly empty then we'll need to re-enable the FD */ if(w->b.start == w->b.end) ev_fd_enable(w->ev, ev_write, w->fd); memcpy(w->b.end, s, n); w->b.end += n; /* Arrange a timeout if there wasn't one set already */ writer_set_timebound(w); return 0; } /** @brief Create a new buffered writer * @param ev Event loop * @param fd File descriptor to write to * @param callback Called if an error occurs and when finished * @param u Passed to @p callback * @param what Text description * @return New writer or @c NULL * * Writers own their file descriptor and close it when they have finished with * it. * * If you pass the same fd to a reader and writer, you must tie them together * with ev_tie(). */ ev_writer *ev_writer_new(ev_source *ev, int fd, ev_error_callback *callback, void *u, const char *what) { ev_writer *w = xmalloc(sizeof *w); D(("registering writer fd %d callback %p %p", fd, (void *)callback, u)); w->s.write = ev_writer_write; w->fd = fd; w->callback = callback; w->u = u; w->ev = ev; w->timebound = 10 * 60; w->spacebound = 512 * 1024; w->what = what; if(ev_fd(ev, ev_write, fd, writer_callback, w, what)) return 0; /* Buffer is initially empty so we don't want a callback */ ev_fd_disable(ev, ev_write, fd); return w; } /** @brief Get/set the time bound * @param w Writer * @param new_time_bound New bound or -1 for no change * @return Latest time bound * * If @p new_time_bound is negative then the current time bound is returned. * Otherwise it is set and the new value returned. * * The time bound is the number of seconds allowed between writes. If it takes * longer than this to flush a buffer then the peer will be assumed to be dead * and an error will be synthesized. 0 means "don't care". The default time * bound is 10 minutes. * * Note that this value does not take into account kernel buffering and * timeouts. */ int ev_writer_time_bound(ev_writer *w, int new_time_bound) { if(new_time_bound >= 0) w->timebound = new_time_bound; return w->timebound; } /** @brief Get/set the space bound * @param w Writer * @param new_space_bound New bound or -1 for no change * @return Latest space bound * * If @p new_space_bound is negative then the current space bound is returned. * Otherwise it is set and the new value returned. * * The space bound is the number of bytes allowed between in the buffer. If * the buffer exceeds this size an error will be synthesized. 0 means "don't * care". The default space bound is 512Kbyte. * * Note that this value does not take into account kernel buffering. */ int ev_writer_space_bound(ev_writer *w, int new_space_bound) { if(new_space_bound >= 0) w->spacebound = new_space_bound; return w->spacebound; } /** @brief Return the sink associated with a writer * @param w Writer * @return Pointer to sink * * Writing to the sink will arrange for those bytes to be written to the file * descriptor as and when it is writable. */ struct sink *ev_writer_sink(ev_writer *w) { if(!w) fatal(0, "ev_write_sink called with null writer"); return &w->s; } /** @brief Close a writer * @param w Writer to close * @return 0 on success, non-0 on error * * Close a writer. No more bytes should be written to its sink. * * When the last byte has been written the callback will be called with an * error code of 0. It is guaranteed that this will NOT happen before * ev_writer_close() returns (although the file descriptor for the writer might * be cancelled by the time it returns). */ int ev_writer_close(ev_writer *w) { D(("close writer fd %d", w->fd)); if(w->eof) return 0; /* already closed */ w->eof = 1; if(w->b.start == w->b.end) { /* We're already finished */ w->error = 0; /* no error */ return ev_timeout(w->ev, 0, 0, writer_shutdown, w); } return 0; } /** @brief Attempt to flush a writer * @param w Writer to flush * @return 0 on success, non-0 on error * * Does a speculative write of any buffered data. Does not block if it cannot * be written. */ int ev_writer_flush(ev_writer *w) { return writer_callback(w->ev, w->fd, w); } /* buffered reader ************************************************************/ /** @brief Shut down a reader * * This is the only path through which we cancel and close the file descriptor. * As with the writer case it is given timeout signature to allow it be * deferred to the next iteration of the event loop. * * We only call @p error_callback if @p error is nonzero (unlike the writer * case). */ static int reader_shutdown(ev_source *ev, const attribute((unused)) struct timeval *now, void *u) { ev_reader *const r = u; if(r->fd == -1) return 0; /* already shut down */ D(("reader_shutdown fd=%d", r->fd)); ev_fd_cancel(ev, ev_read, r->fd); r->eof = 1; if(r->writer) { D(("found a tied writer")); /* If there is a writer still around we just untie it */ r->writer->reader = 0; shutdown(r->fd, SHUT_RD); /* there'll be no more reads */ } else { D(("no tied writer found")); /* There's no writer so we are free to close the FD */ xclose(r->fd); } r->fd = -1; if(r->error) return r->error_callback(ev, r->error, r->u); else return 0; } /** @brief Called when a reader's @p fd is readable */ static int reader_callback(ev_source *ev, int fd, void *u) { ev_reader *r = u; int n; buffer_space(&r->b, 1); n = read(fd, r->b.end, r->b.top - r->b.end); D(("read fd %d buffer %d returned %d errno %d", fd, (int)(r->b.top - r->b.end), n, errno)); if(n > 0) { r->b.end += n; return r->callback(ev, r, r->b.start, r->b.end - r->b.start, 0, r->u); } else if(n == 0) { /* No more read callbacks needed */ ev_fd_disable(r->ev, ev_read, r->fd); ev_timeout(r->ev, 0, 0, reader_shutdown, r); /* Pass the remaining data and an eof indicator to the user */ return r->callback(ev, r, r->b.start, r->b.end - r->b.start, 1, r->u); } else { switch(errno) { case EINTR: case EAGAIN: break; default: /* Fatal error, kill the reader now */ r->error = errno; return reader_shutdown(ev, 0, r); } } return 0; } /** @brief Create a new buffered reader * @param ev Event loop * @param fd File descriptor to read from * @param callback Called when new data is available * @param error_callback Called if an error occurs * @param u Passed to callbacks * @param what Text description * @return New reader or @c NULL * * Readers own their fd and close it when they are finished with it. * * If you pass the same fd to a reader and writer, you must tie them together * with ev_tie(). */ ev_reader *ev_reader_new(ev_source *ev, int fd, ev_reader_callback *callback, ev_error_callback *error_callback, void *u, const char *what) { ev_reader *r = xmalloc(sizeof *r); D(("registering reader fd %d callback %p %p %p", fd, (void *)callback, (void *)error_callback, u)); r->fd = fd; r->callback = callback; r->error_callback = error_callback; r->u = u; r->ev = ev; if(ev_fd(ev, ev_read, fd, reader_callback, r, what)) return 0; return r; } void ev_reader_buffer(ev_reader *r, size_t nbytes) { buffer_space(&r->b, nbytes - (r->b.end - r->b.start)); } /** @brief Consume @p n bytes from the reader's buffer * @param r Reader * @param n Number of bytes to consume * * Tells the reader than the next @p n bytes have been dealt with and can now * be discarded. */ void ev_reader_consume(ev_reader *r, size_t n) { r->b.start += n; } /** @brief Cancel a reader * @param r Reader * @return 0 on success, non-0 on error * * No further callbacks will be made, and the FD will be closed (in a later * iteration of the event loop). */ int ev_reader_cancel(ev_reader *r) { D(("cancel reader fd %d", r->fd)); if(r->fd == -1) return 0; /* already thoroughly cancelled */ ev_fd_disable(r->ev, ev_read, r->fd); return ev_timeout(r->ev, 0, 0, reader_shutdown, r); } /** @brief Temporarily disable a reader * @param r Reader * @return 0 on success, non-0 on error * * No further callbacks for this reader will be made. Re-enable with * ev_reader_enable(). */ int ev_reader_disable(ev_reader *r) { D(("disable reader fd %d", r->fd)); return ev_fd_disable(r->ev, ev_read, r->fd); } /** @brief Called from ev_run() for ev_reader_incomplete() */ static int reader_continuation(ev_source attribute((unused)) *ev, const attribute((unused)) struct timeval *now, void *u) { ev_reader *r = u; D(("reader continuation callback fd %d", r->fd)); /* If not at EOF turn the FD back on */ if(!r->eof) if(ev_fd_enable(r->ev, ev_read, r->fd)) return -1; /* We're already in a timeout callback so there's no reason we can't call the * user callback directly (compare ev_reader_enable()). */ return r->callback(ev, r, r->b.start, r->b.end - r->b.start, r->eof, r->u); } /** @brief Arrange another callback * @param r reader * @return 0 on success, non-0 on error * * Indicates that the reader can process more input but would like to yield to * other clients of the event loop. Input will be disabled but it will be * re-enabled on the next iteration of the event loop and the read callback * will be called again (even if no further bytes are available). */ int ev_reader_incomplete(ev_reader *r) { if(ev_fd_disable(r->ev, ev_read, r->fd)) return -1; return ev_timeout(r->ev, 0, 0, reader_continuation, r); } static int reader_enabled(ev_source *ev, const attribute((unused)) struct timeval *now, void *u) { ev_reader *r = u; D(("reader enabled callback fd %d", r->fd)); return r->callback(ev, r, r->b.start, r->b.end - r->b.start, r->eof, r->u); } /** @brief Re-enable reading * @param r reader * @return 0 on success, non-0 on error * * If there is unconsumed data then you get a callback next time round the * event loop even if nothing new has been read. * * The idea is in your read callback you come across a line (or whatever) that * can't be processed immediately. So you set up processing and disable * reading with ev_reader_disable(). Later when you finish processing you * re-enable. You'll automatically get another callback directly from the * event loop (i.e. not from inside ev_reader_enable()) so you can handle the * next line (or whatever) if the whole thing has in fact already arrived. * * The difference between this process and calling ev_reader_incomplete() is * ev_reader_incomplete() deals with the case where you can process now but * would rather yield to other clients of the event loop, while using * ev_reader_disable() and ev_reader_enable() deals with the case where you * cannot process input yet because some other process is actually not * complete. */ int ev_reader_enable(ev_reader *r) { D(("enable reader fd %d", r->fd)); /* First if we're not at EOF then we re-enable reading */ if(!r->eof) if(ev_fd_enable(r->ev, ev_read, r->fd)) return -1; /* Arrange another callback next time round the event loop */ return ev_timeout(r->ev, 0, 0, reader_enabled, r); } /** @brief Tie a reader and a writer together * @param r Reader * @param w Writer * @return 0 on success, non-0 on error * * This function must be called if @p r and @p w share a file descritptor. */ int ev_tie(ev_reader *r, ev_writer *w) { assert(r->writer == 0); assert(w->reader == 0); r->writer = w; w->reader = r; return 0; } /* Local Variables: c-basic-offset:2 comment-column:40 fill-column:79 End: */