2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-30 22:15:20 +00:00

3137. [func] Improve hardware scalability by allowing multiple

worker threads to process incoming UDP packets.
			This can significantly increase query throughput
			on some systems.  [RT #22992]
This commit is contained in:
Evan Hunt
2011-07-28 04:04:37 +00:00
parent 8181aa6228
commit f07b2fccaf
17 changed files with 738 additions and 223 deletions

View File

@@ -15,7 +15,7 @@
* PERFORMANCE OF THIS SOFTWARE.
*/
/* $Id: socket.c,v 1.340 2011/07/21 23:47:53 tbox Exp $ */
/* $Id: socket.c,v 1.341 2011/07/28 04:04:37 each Exp $ */
/*! \file */
@@ -334,7 +334,8 @@ struct isc__socket {
listener : 1, /* listener socket */
connected : 1,
connecting : 1, /* connect pending */
bound : 1; /* bound to local addr */
bound : 1, /* bound to local addr */
dupped : 1;
#ifdef ISC_NET_RECVOVERFLOW
unsigned char overflow; /* used for MSG_TRUNC fake */
@@ -428,6 +429,10 @@ static isc__socketmgr_t *socketmgr = NULL;
# define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)
#endif
static isc_result_t socket_create(isc_socketmgr_t *manager0, int pf,
isc_sockettype_t type,
isc_socket_t **socketp,
isc_socket_t *dup_socket);
static void send_recvdone_event(isc__socket_t *, isc_socketevent_t **);
static void send_senddone_event(isc__socket_t *, isc_socketevent_t **);
static void free_socket(isc__socket_t **);
@@ -2045,6 +2050,7 @@ allocate_socket(isc__socketmgr_t *manager, isc_sockettype_t type,
sock->manager = manager;
sock->type = type;
sock->fd = -1;
sock->dupped = 0;
sock->statsindex = NULL;
ISC_LINK_INIT(sock, link);
@@ -2221,7 +2227,8 @@ clear_bsdcompat(void) {
#endif
static isc_result_t
opensocket(isc__socketmgr_t *manager, isc__socket_t *sock) {
opensocket(isc__socketmgr_t *manager, isc__socket_t *sock,
isc__socket_t *dup_socket) {
char strbuf[ISC_STRERRORSIZE];
const char *err = "socket";
int tries = 0;
@@ -2234,22 +2241,28 @@ opensocket(isc__socketmgr_t *manager, isc__socket_t *sock) {
#endif
again:
switch (sock->type) {
case isc_sockettype_udp:
sock->fd = socket(sock->pf, SOCK_DGRAM, IPPROTO_UDP);
break;
case isc_sockettype_tcp:
sock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP);
break;
case isc_sockettype_unix:
sock->fd = socket(sock->pf, SOCK_STREAM, 0);
break;
case isc_sockettype_fdwatch:
/*
* We should not be called for isc_sockettype_fdwatch sockets.
*/
INSIST(0);
break;
if (dup_socket == NULL) {
switch (sock->type) {
case isc_sockettype_udp:
sock->fd = socket(sock->pf, SOCK_DGRAM, IPPROTO_UDP);
break;
case isc_sockettype_tcp:
sock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP);
break;
case isc_sockettype_unix:
sock->fd = socket(sock->pf, SOCK_STREAM, 0);
break;
case isc_sockettype_fdwatch:
/*
* We should not be called for isc_sockettype_fdwatch
* sockets.
*/
INSIST(0);
break;
}
} else {
sock->fd = dup(dup_socket->fd);
sock->dupped = 1;
}
if (sock->fd == -1 && errno == EINTR && tries++ < 42)
goto again;
@@ -2326,6 +2339,9 @@ opensocket(isc__socketmgr_t *manager, isc__socket_t *sock) {
}
}
if (dup_socket != NULL)
goto setup_done;
if (make_nonblock(sock->fd) != ISC_R_SUCCESS) {
(void)close(sock->fd);
return (ISC_R_UNEXPECTED);
@@ -2509,20 +2525,21 @@ opensocket(isc__socketmgr_t *manager, isc__socket_t *sock) {
}
#endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */
setup_done:
inc_stats(manager->stats, sock->statsindex[STATID_OPEN]);
return (ISC_R_SUCCESS);
}
/*%
* Create a new 'type' socket managed by 'manager'. Events
* will be posted to 'task' and when dispatched 'action' will be
* called with 'arg' as the arg value. The new socket is returned
* in 'socketp'.
/*
* Create a 'type' socket or duplicate an existing socket, managed
* by 'manager'. Events will be posted to 'task' and when dispatched
* 'action' will be called with 'arg' as the arg value. The new
* socket is returned in 'socketp'.
*/
ISC_SOCKETFUNC_SCOPE isc_result_t
isc__socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type,
isc_socket_t **socketp)
static isc_result_t
socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type,
isc_socket_t **socketp, isc_socket_t *dup_socket)
{
isc__socket_t *sock = NULL;
isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0;
@@ -2554,7 +2571,8 @@ isc__socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type,
}
sock->pf = pf;
result = opensocket(manager, sock);
result = opensocket(manager, sock, (isc__socket_t *)dup_socket);
if (result != ISC_R_SUCCESS) {
inc_stats(manager->stats, sock->statsindex[STATID_OPENFAIL]);
free_socket(&sock);
@@ -2589,11 +2607,40 @@ isc__socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type,
UNLOCK(&manager->lock);
socket_log(sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
ISC_MSG_CREATED, "created");
ISC_MSG_CREATED, dup_socket == NULL ? "dupped" : "created");
return (ISC_R_SUCCESS);
}
/*%
* Create a new 'type' socket managed by 'manager'. Events
* will be posted to 'task' and when dispatched 'action' will be
* called with 'arg' as the arg value. The new socket is returned
* in 'socketp'.
*/
ISC_SOCKETFUNC_SCOPE isc_result_t
isc__socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type,
isc_socket_t **socketp)
{
return (socket_create(manager0, pf, type, socketp, NULL));
}
/*%
* Duplicate an existing socket. The new socket is returned
* in 'socketp'.
*/
ISC_SOCKETFUNC_SCOPE isc_result_t
isc__socket_dup(isc_socket_t *sock0, isc_socket_t **socketp) {
isc__socket_t *sock = (isc__socket_t *)sock0;
REQUIRE(VALID_SOCKET(sock));
REQUIRE(socketp != NULL && *socketp == NULL);
return (socket_create((isc_socketmgr_t *) sock->manager,
sock->pf, sock->type, socketp,
sock0));
}
#ifdef BIND9
ISC_SOCKETFUNC_SCOPE isc_result_t
isc__socket_open(isc_socket_t *sock0) {
@@ -2612,7 +2659,7 @@ isc__socket_open(isc_socket_t *sock0) {
*/
REQUIRE(sock->fd == -1);
result = opensocket(sock->manager, sock);
result = opensocket(sock->manager, sock, NULL);
if (result != ISC_R_SUCCESS)
sock->fd = -1;
@@ -2792,6 +2839,7 @@ isc__socket_close(isc_socket_t *sock0) {
int fd;
isc__socketmgr_t *manager;
fflush(stdout);
REQUIRE(VALID_SOCKET(sock));
LOCK(&sock->lock);
@@ -2812,6 +2860,7 @@ isc__socket_close(isc_socket_t *sock0) {
manager = sock->manager;
fd = sock->fd;
sock->fd = -1;
sock->dupped = 0;
memset(sock->name, 0, sizeof(sock->name));
sock->tag = NULL;
sock->listener = 0;
@@ -4977,48 +5026,49 @@ isc__socket_bind(isc_socket_t *sock0, isc_sockaddr_t *sockaddr,
UNLOCK(&sock->lock);
return (ISC_R_FAMILYMISMATCH);
}
/*
* Only set SO_REUSEADDR when we want a specific port.
*/
if (!sock->dupped) {
/*
* Only set SO_REUSEADDR when we want a specific port.
*/
#ifdef AF_UNIX
if (sock->pf == AF_UNIX)
goto bind_socket;
if (sock->pf == AF_UNIX)
goto bind_socket;
#endif
if ((options & ISC_SOCKET_REUSEADDRESS) != 0 &&
isc_sockaddr_getport(sockaddr) != (in_port_t)0 &&
setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on,
sizeof(on)) < 0) {
UNEXPECTED_ERROR(__FILE__, __LINE__,
"setsockopt(%d) %s", sock->fd,
isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
ISC_MSG_FAILED, "failed"));
/* Press on... */
}
if ((options & ISC_SOCKET_REUSEADDRESS) != 0 &&
isc_sockaddr_getport(sockaddr) != (in_port_t)0 &&
setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on,
sizeof(on)) < 0) {
UNEXPECTED_ERROR(__FILE__, __LINE__,
"setsockopt(%d) %s", sock->fd,
isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
ISC_MSG_FAILED, "failed"));
/* Press on... */
}
#ifdef AF_UNIX
bind_socket:
bind_socket:
#endif
if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) {
inc_stats(sock->manager->stats,
sock->statsindex[STATID_BINDFAIL]);
if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) {
inc_stats(sock->manager->stats,
sock->statsindex[STATID_BINDFAIL]);
UNLOCK(&sock->lock);
switch (errno) {
case EACCES:
return (ISC_R_NOPERM);
case EADDRNOTAVAIL:
return (ISC_R_ADDRNOTAVAIL);
case EADDRINUSE:
return (ISC_R_ADDRINUSE);
case EINVAL:
return (ISC_R_BOUND);
default:
isc__strerror(errno, strbuf, sizeof(strbuf));
UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s",
strbuf);
return (ISC_R_UNEXPECTED);
UNLOCK(&sock->lock);
switch (errno) {
case EACCES:
return (ISC_R_NOPERM);
case EADDRNOTAVAIL:
return (ISC_R_ADDRNOTAVAIL);
case EADDRINUSE:
return (ISC_R_ADDRINUSE);
case EINVAL:
return (ISC_R_BOUND);
default:
isc__strerror(errno, strbuf, sizeof(strbuf));
UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s",
strbuf);
return (ISC_R_UNEXPECTED);
}
}
}
socket_log(sock, sockaddr, TRACE,
isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound");
sock->bound = 1;
@@ -5826,6 +5876,13 @@ isc__socket_register() {
}
#endif
int
isc_socket_getfd(isc_socket_t *socket0) {
isc__socket_t *socket = (isc__socket_t *)socket0;
return ((short) socket->fd);
}
#if defined(HAVE_LIBXML2) && defined(BIND9)
static const char *