diff --git a/CHANGES b/CHANGES index 639d837dd1..ab28a37ab9 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,9 @@ +2489. [port] solaris: Workaround Solaris's kernel bug about + /dev/poll: + http://bugs.opensolaris.org/view_bug.do?bug_id=6724237 + Define ISC_SOCKET_USE_POLLWATCH at build time to enable + this workaround. [RT #18870] + 2488. [func] Added a tool, dnssec-dsfromkey, to generate DS records from keyset and .key files. [RT #18694] diff --git a/README b/README index 808d5e9e6e..4c0f2abc27 100644 --- a/README +++ b/README @@ -457,6 +457,10 @@ Building To create the default pid files in ${localstatedir}/run rather than ${localstatedir}/run/{named,lwresd}/ set. -DNS_RUN_PID_DIR=0 + Enabling workaround for Solaris kernel bug about /dev/poll + -DISC_SOCKET_USE_POLLWATCH=1 + The watch timeout is also configurable, e.g., + -DISC_SOCKET_POLLWATCH_TIMEOUT=20 LDFLAGS Linker flags. Defaults to empty string. diff --git a/lib/isc/unix/socket.c b/lib/isc/unix/socket.c index 67f34c8d31..4b5bd8344d 100644 --- a/lib/isc/unix/socket.c +++ b/lib/isc/unix/socket.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: socket.c,v 1.304 2008/10/17 21:49:23 jinmei Exp $ */ +/* $Id: socket.c,v 1.305 2008/11/08 22:35:12 jinmei Exp $ */ /*! \file */ @@ -148,6 +148,35 @@ struct isc_socketwait { #endif /* __APPLE__ */ #endif /* USE_SELECT */ +#ifdef ISC_SOCKET_USE_POLLWATCH +/*% + * If this macro is defined, enable workaround for a Solaris /dev/poll kernel + * bug: DP_POLL ioctl could keep sleeping even if socket I/O is possible for + * some of the specified FD. The idea is based on the observation that it's + * likely for a busy server to keep receiving packets. It specifically works + * as follows: the socket watcher is first initialized with the state of + * "poll_idle". While it's in the idle state it keeps sleeping until a socket + * event occurs. When it wakes up for a socket I/O event, it moves to the + * poll_active state, and sets the poll timeout to a short period + * (ISC_SOCKET_POLLWATCH_TIMEOUT msec). If timeout occurs in this state, the + * watcher goes to the poll_checking state with the same timeout period. + * In this state, the watcher tries to detect whether this is a break + * during intermittent events or the kernel bug is triggered. If the next + * polling reports an event within the short period, the previous timeout is + * likely to be a kernel bug, and so the watcher goes back to the active state. + * Otherwise, it moves to the idle state again. + * + * It's not clear whether this is a thread-related bug, but since we've only + * seen this with threads, this workaround is used only when enabling threads. + */ + +typedef enum { poll_idle, poll_active, poll_checking } pollstate_t; + +#ifndef ISC_SOCKET_POLLWATCH_TIMEOUT +#define ISC_SOCKET_POLLWATCH_TIMEOUT 10 +#endif /* ISC_SOCKET_POLLWATCH_TIMEOUT */ +#endif /* ISC_SOCKET_USE_POLLWATCH */ + /*% * Size of per-FD lock buckets. */ @@ -3230,6 +3259,9 @@ watcher(void *uap) { int maxfd; #endif char strbuf[ISC_STRERRORSIZE]; +#ifdef ISC_SOCKET_USE_POLLWATCH + pollstate_t pollstate = poll_idle; +#endif /* * Get the control fd here. This will never change. @@ -3247,7 +3279,14 @@ watcher(void *uap) { #elif defined(USE_DEVPOLL) dvp.dp_fds = manager->events; dvp.dp_nfds = manager->nevents; +#ifndef ISC_SOCKET_USE_POLLWATCH dvp.dp_timeout = -1; +#else + if (pollstate == poll_idle) + dvp.dp_timeout = -1; + else + dvp.dp_timeout = ISC_SOCKET_POLLWATCH_TIMEOUT; +#endif /* ISC_SOCKET_USE_POLLWATCH */ cc = ioctl(manager->devpoll_fd, DP_POLL, &dvp); #elif defined(USE_SELECT) LOCK(&manager->lock); @@ -3271,6 +3310,33 @@ watcher(void *uap) { ISC_MSG_FAILED, "failed"), strbuf); } + +#if defined(USE_DEVPOLL) && defined(ISC_SOCKET_USE_POLLWATCH) + if (cc == 0) { + if (pollstate == poll_active) + pollstate = poll_checking; + else if (pollstate == poll_checking) + pollstate = poll_idle; + } else if (cc > 0) { + if (pollstate == poll_checking) { + /* + * XXX: We'd like to use a more + * verbose log level as it's actually an + * unexpected event, but the kernel bug + * reportedly happens pretty frequently + * (and it can also be a false positive) + * so it would be just too noisy. + */ + manager_log(manager, + ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_SOCKET, + ISC_LOG_DEBUG(1), + ISC_LOG_INFO, + "unexpected POLL timeout"); + } + pollstate = poll_active; + } +#endif } while (cc < 0); #if defined(USE_KQUEUE) || defined (USE_EPOLL) || defined (USE_DEVPOLL)