diff --git a/CHANGES b/CHANGES index b26c348341..4baa0b628a 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,8 @@ +3137. [func] Improve hardware scalability by allowing multiple + worker threads to process incoming UDP packets. + This can significantly increase query throughput + on some systems. [RT #22992] + 3136. [func] Add RFC 1918 reverse zones to the list of built-in empty zones switched on by the 'empty-zones-enable' option. [RT #24990] diff --git a/bin/named/client.c b/bin/named/client.c index 6e8ddd1b51..0e26afa7c2 100644 --- a/bin/named/client.c +++ b/bin/named/client.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: client.c,v 1.273 2011/05/05 23:44:52 marka Exp $ */ +/* $Id: client.c,v 1.274 2011/07/28 04:04:36 each Exp $ */ #include @@ -2530,8 +2530,10 @@ ns_clientmgr_createclients(ns_clientmgr_t *manager, unsigned int n, ns_interface_t *ifp, isc_boolean_t tcp) { isc_result_t result = ISC_R_SUCCESS; + isc_boolean_t success = ISC_FALSE; unsigned int i; ns_client_t *client; + unsigned int disp; REQUIRE(VALID_MANAGER(manager)); REQUIRE(n > 0); @@ -2546,61 +2548,68 @@ ns_clientmgr_createclients(ns_clientmgr_t *manager, unsigned int n, LOCK(&manager->lock); - for (i = 0; i < n; i++) { - isc_event_t *ev; - /* - * Allocate a client. First try to get a recycled one; - * if that fails, make a new one. - */ - client = NULL; - if (!ns_g_clienttest) - client = ISC_LIST_HEAD(manager->inactive); - if (client != NULL) { - MTRACE("recycle"); - ISC_LIST_UNLINK(manager->inactive, client, link); - client->list = NULL; - } else { - MTRACE("create new"); - result = client_create(manager, &client); - if (result != ISC_R_SUCCESS) - break; + for (disp = 0; disp < n; disp++) { + for (i = 0; i < n; i++) { + isc_event_t *ev; + + /* + * Allocate a client. First try to get a recycled one; + * if that fails, make a new one. + */ + client = NULL; + if (!ns_g_clienttest) + client = ISC_LIST_HEAD(manager->inactive); + if (client != NULL) { + MTRACE("recycle"); + ISC_LIST_UNLINK(manager->inactive, client, + link); + client->list = NULL; + } else { + MTRACE("create new"); + result = client_create(manager, &client); + if (result != ISC_R_SUCCESS) + break; + } + + ns_interface_attach(ifp, &client->interface); + client->state = NS_CLIENTSTATE_READY; + INSIST(client->recursionquota == NULL); + + if (tcp) { + client->attributes |= NS_CLIENTATTR_TCP; + isc_socket_attach(ifp->tcpsocket, + &client->tcplistener); + } else { + isc_socket_t *sock; + + dns_dispatch_attach(ifp->udpdispatch[disp], + &client->dispatch); + sock = dns_dispatch_getsocket(client->dispatch); + isc_socket_attach(sock, &client->udpsocket); + } + + client->manager = manager; + ISC_LIST_APPEND(manager->active, client, link); + client->list = &manager->active; + + INSIST(client->nctls == 0); + client->nctls++; + ev = &client->ctlevent; + isc_task_send(client->task, &ev); + + success = ISC_TRUE; } - - ns_interface_attach(ifp, &client->interface); - client->state = NS_CLIENTSTATE_READY; - INSIST(client->recursionquota == NULL); - - if (tcp) { - client->attributes |= NS_CLIENTATTR_TCP; - isc_socket_attach(ifp->tcpsocket, - &client->tcplistener); - } else { - isc_socket_t *sock; - - dns_dispatch_attach(ifp->udpdispatch, - &client->dispatch); - sock = dns_dispatch_getsocket(client->dispatch); - isc_socket_attach(sock, &client->udpsocket); - } - client->manager = manager; - ISC_LIST_APPEND(manager->active, client, link); - client->list = &manager->active; - - INSIST(client->nctls == 0); - client->nctls++; - ev = &client->ctlevent; - isc_task_send(client->task, &ev); - } - if (i != 0) { - /* - * We managed to create at least one client, so we - * declare victory. - */ - result = ISC_R_SUCCESS; } UNLOCK(&manager->lock); + /* + * If managed to create at least one client for + * one dispatch, we declare victory. + */ + if (success) + return (ISC_R_SUCCESS); + return (result); } diff --git a/bin/named/include/named/interfacemgr.h b/bin/named/include/named/interfacemgr.h index 2724c393cd..2821984cc8 100644 --- a/bin/named/include/named/interfacemgr.h +++ b/bin/named/include/named/interfacemgr.h @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: interfacemgr.h,v 1.33 2007/06/19 23:46:59 tbox Exp $ */ +/* $Id: interfacemgr.h,v 1.34 2011/07/28 04:04:36 each Exp $ */ #ifndef NAMED_INTERFACEMGR_H #define NAMED_INTERFACEMGR_H 1 @@ -65,7 +65,8 @@ #define NS_INTERFACE_VALID(t) ISC_MAGIC_VALID(t, IFACE_MAGIC) #define NS_INTERFACEFLAG_ANYADDR 0x01U /*%< bound to "any" address */ - +#define MAX_UDP_DISPATCH 128 /*%< Maximum number of UDP dispatchers + to start per interface */ /*% The nameserver interface structure */ struct ns_interface { unsigned int magic; /*%< Magic number. */ @@ -76,11 +77,13 @@ struct ns_interface { isc_sockaddr_t addr; /*%< Address and port. */ unsigned int flags; /*%< Interface characteristics */ char name[32]; /*%< Null terminated. */ - dns_dispatch_t * udpdispatch; /*%< UDP dispatcher. */ + dns_dispatch_t * udpdispatch[MAX_UDP_DISPATCH]; + /*%< UDP dispatchers. */ isc_socket_t * tcpsocket; /*%< TCP socket. */ int ntcptarget; /*%< Desired number of concurrent TCP accepts */ int ntcpcurrent; /*%< Current ditto, locked */ + int nudpdispatch; /*%< Number of UDP dispatches */ ns_clientmgr_t * clientmgr; /*%< Client manager. */ ISC_LINK(ns_interface_t) link; }; diff --git a/bin/named/interfacemgr.c b/bin/named/interfacemgr.c index 782aacd52e..7149604039 100644 --- a/bin/named/interfacemgr.c +++ b/bin/named/interfacemgr.c @@ -15,13 +15,14 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: interfacemgr.c,v 1.97 2011/03/12 04:59:46 tbox Exp $ */ +/* $Id: interfacemgr.c,v 1.98 2011/07/28 04:04:36 each Exp $ */ /*! \file */ #include #include +#include #include #include #include @@ -195,6 +196,7 @@ ns_interface_create(ns_interfacemgr_t *mgr, isc_sockaddr_t *addr, strncpy(ifp->name, name, sizeof(ifp->name)); ifp->name[sizeof(ifp->name)-1] = '\0'; ifp->clientmgr = NULL; + int disp; result = isc_mutex_init(&ifp->lock); if (result != ISC_R_SUCCESS) @@ -210,7 +212,8 @@ ns_interface_create(ns_interfacemgr_t *mgr, isc_sockaddr_t *addr, goto clientmgr_create_failure; } - ifp->udpdispatch = NULL; + for (disp = 0; disp < MAX_UDP_DISPATCH; disp++) + ifp->udpdispatch[disp] = NULL; ifp->tcpsocket = NULL; /* @@ -221,6 +224,7 @@ ns_interface_create(ns_interfacemgr_t *mgr, isc_sockaddr_t *addr, */ ifp->ntcptarget = 1; ifp->ntcpcurrent = 0; + ifp->nudpdispatch = 0; ISC_LINK_INIT(ifp, link); @@ -247,6 +251,7 @@ ns_interface_listenudp(ns_interface_t *ifp) { isc_result_t result; unsigned int attrs; unsigned int attrmask; + int disp, i; attrs = 0; attrs |= DNS_DISPATCHATTR_UDP; @@ -258,15 +263,25 @@ ns_interface_listenudp(ns_interface_t *ifp) { attrmask = 0; attrmask |= DNS_DISPATCHATTR_UDP | DNS_DISPATCHATTR_TCP; attrmask |= DNS_DISPATCHATTR_IPV4 | DNS_DISPATCHATTR_IPV6; - result = dns_dispatch_getudp(ifp->mgr->dispatchmgr, ns_g_socketmgr, - ns_g_taskmgr, &ifp->addr, - 4096, 1000, 32768, 8219, 8237, - attrs, attrmask, &ifp->udpdispatch); - if (result != ISC_R_SUCCESS) { - isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_ERROR, - "could not listen on UDP socket: %s", - isc_result_totext(result)); - goto udp_dispatch_failure; + + ifp->nudpdispatch = ISC_MIN(isc_os_ncpus(), MAX_UDP_DISPATCH); + for (disp = 0; disp < ifp->nudpdispatch; disp++) { + result = dns_dispatch_getudp_dup(ifp->mgr->dispatchmgr, + ns_g_socketmgr, + ns_g_taskmgr, &ifp->addr, + 4096, 1000, 32768, 8219, 8237, + attrs, attrmask, + &ifp->udpdispatch[disp], + disp == 0 + ? NULL + : ifp->udpdispatch[0]); + if (result != ISC_R_SUCCESS) { + isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_ERROR, + "could not listen on UDP socket: %s", + isc_result_totext(result)); + goto udp_dispatch_failure; + } + } result = ns_clientmgr_createclients(ifp->clientmgr, ns_g_cpus, @@ -277,12 +292,17 @@ ns_interface_listenudp(ns_interface_t *ifp) { isc_result_totext(result)); goto addtodispatch_failure; } + return (ISC_R_SUCCESS); addtodispatch_failure: - dns_dispatch_changeattributes(ifp->udpdispatch, 0, - DNS_DISPATCHATTR_NOLISTEN); - dns_dispatch_detach(&ifp->udpdispatch); + for (i = disp - 1; i <= 0; i--) { + dns_dispatch_changeattributes(ifp->udpdispatch[i], 0, + DNS_DISPATCHATTR_NOLISTEN); + dns_dispatch_detach(&(ifp->udpdispatch[i])); + } + ifp->nudpdispatch = 0; + udp_dispatch_failure: return (result); } @@ -397,14 +417,16 @@ static void ns_interface_destroy(ns_interface_t *ifp) { isc_mem_t *mctx = ifp->mgr->mctx; REQUIRE(NS_INTERFACE_VALID(ifp)); + int disp; ns_interface_shutdown(ifp); - if (ifp->udpdispatch != NULL) { - dns_dispatch_changeattributes(ifp->udpdispatch, 0, - DNS_DISPATCHATTR_NOLISTEN); - dns_dispatch_detach(&ifp->udpdispatch); - } + for (disp = ifp->nudpdispatch; disp >= 0; disp--) + if (ifp->udpdispatch[disp] != NULL) { + dns_dispatch_changeattributes(ifp->udpdispatch[disp], 0, + DNS_DISPATCHATTR_NOLISTEN); + dns_dispatch_detach(&(ifp->udpdispatch[disp])); + } if (ifp->tcpsocket != NULL) isc_socket_detach(&ifp->tcpsocket); diff --git a/configure.in b/configure.in index 6d55410e87..9a9f3e4858 100644 --- a/configure.in +++ b/configure.in @@ -18,7 +18,7 @@ AC_DIVERT_PUSH(1)dnl esyscmd([sed "s/^/# /" COPYRIGHT])dnl AC_DIVERT_POP()dnl -AC_REVISION($Revision: 1.528 $) +AC_REVISION($Revision: 1.529 $) AC_INIT(lib/dns/name.c) AC_PREREQ(2.59) @@ -2552,7 +2552,7 @@ yes) esac AC_SUBST(ISC_PLATFORM_HAVEIFNAMETOINDEX) -AC_CHECK_FUNCS(nanosleep) +AC_CHECK_FUNCS(nanosleep usleep) # # Machine architecture dependent features diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index 1e5f415b44..ba3eefe013 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: dispatch.c,v 1.172 2011/04/06 10:27:16 marka Exp $ */ +/* $Id: dispatch.c,v 1.173 2011/07/28 04:04:36 each Exp $ */ /*! \file */ @@ -312,14 +312,16 @@ static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr, - isc_socket_t **sockp); + isc_socket_t **sockp, + isc_socket_t *dup_socket); static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr, unsigned int maxrequests, unsigned int attributes, - dns_dispatch_t **dispp); + dns_dispatch_t **dispp, + isc_socket_t *dup_socket); static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr); static void destroy_mgr(dns_dispatchmgr_t **mgrp); static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets, @@ -327,7 +329,8 @@ static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets, isc_boolean_t needaddrtable); static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp); static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local, - unsigned int options, isc_socket_t **sockp); + unsigned int options, isc_socket_t **sockp, + isc_socket_t *dup_socket); static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock, isc_sockaddr_t *sockaddrp); @@ -902,7 +905,8 @@ get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest, portentry = port_search(disp, port); if (portentry != NULL) bindoptions |= ISC_SOCKET_REUSEADDRESS; - result = open_socket(sockmgr, &localaddr, bindoptions, &sock); + result = open_socket(sockmgr, &localaddr, bindoptions, &sock, + NULL); if (result == ISC_R_SUCCESS) { if (portentry == NULL) { portentry = new_portentry(disp, port); @@ -1787,19 +1791,14 @@ destroy_mgr(dns_dispatchmgr_t **mgrp) { static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local, - unsigned int options, isc_socket_t **sockp) + unsigned int options, isc_socket_t **sockp, + isc_socket_t *dup_socket) { isc_socket_t *sock; isc_result_t result; sock = *sockp; - if (sock == NULL) { - result = isc_socket_create(mgr, isc_sockaddr_pf(local), - isc_sockettype_udp, &sock); - if (result != ISC_R_SUCCESS) - return (result); - isc_socket_setname(sock, "dispatcher", NULL); - } else { + if (sock != NULL) { #ifdef BIND9 result = isc_socket_open(sock); if (result != ISC_R_SUCCESS) @@ -1807,8 +1806,19 @@ open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local, #else INSIST(0); #endif + } else if (dup_socket != NULL) { + result = isc_socket_dup(dup_socket, &sock); + if (result != ISC_R_SUCCESS) + return (result); + } else { + result = isc_socket_create(mgr, isc_sockaddr_pf(local), + isc_sockettype_udp, &sock); + if (result != ISC_R_SUCCESS) + return (result); } + isc_socket_setname(sock, "dispatcher", NULL); + #ifndef ISC_ALLOW_MAPPED isc_socket_ipv6only(sock, ISC_TRUE); #endif @@ -2646,13 +2656,13 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock, } isc_result_t -dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, +dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr, unsigned int buffersize, unsigned int maxbuffers, unsigned int maxrequests, unsigned int buckets, unsigned int increment, unsigned int attributes, unsigned int mask, - dns_dispatch_t **dispp) + dns_dispatch_t **dispp, dns_dispatch_t *dup_dispatch) { isc_result_t result; dns_dispatch_t *disp = NULL; @@ -2683,28 +2693,31 @@ dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, /* * See if we have a dispatcher that matches. */ - result = dispatch_find(mgr, localaddr, attributes, mask, &disp); - if (result == ISC_R_SUCCESS) { - disp->refcount++; + if (dup_dispatch == NULL) { + result = dispatch_find(mgr, localaddr, attributes, mask, &disp); + if (result == ISC_R_SUCCESS) { + disp->refcount++; - if (disp->maxrequests < maxrequests) - disp->maxrequests = maxrequests; + if (disp->maxrequests < maxrequests) + disp->maxrequests = maxrequests; - if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 && - (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) - { - disp->attributes |= DNS_DISPATCHATTR_NOLISTEN; - if (disp->recv_pending != 0) - isc_socket_cancel(disp->socket, disp->task[0], - ISC_SOCKCANCEL_RECV); + if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 + && (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) + { + disp->attributes |= DNS_DISPATCHATTR_NOLISTEN; + if (disp->recv_pending != 0) + isc_socket_cancel(disp->socket, + disp->task[0], + ISC_SOCKCANCEL_RECV); + } + + UNLOCK(&disp->lock); + UNLOCK(&mgr->lock); + + *dispp = disp; + + return (ISC_R_SUCCESS); } - - UNLOCK(&disp->lock); - UNLOCK(&mgr->lock); - - *dispp = disp; - - return (ISC_R_SUCCESS); } createudp: @@ -2712,7 +2725,11 @@ dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, * Nope, create one. */ result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr, - maxrequests, attributes, &disp); + maxrequests, attributes, &disp, + dup_dispatch == NULL + ? NULL + : dup_dispatch->socket); + if (result != ISC_R_SUCCESS) { UNLOCK(&mgr->lock); return (result); @@ -2720,9 +2737,24 @@ dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, UNLOCK(&mgr->lock); *dispp = disp; + return (ISC_R_SUCCESS); } +isc_result_t +dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, + isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr, + unsigned int buffersize, + unsigned int maxbuffers, unsigned int maxrequests, + unsigned int buckets, unsigned int increment, + unsigned int attributes, unsigned int mask, + dns_dispatch_t **dispp) +{ + return dns_dispatch_getudp_dup(mgr, sockmgr, taskmgr, localaddr, + buffersize, maxbuffers, maxrequests, buckets, increment, + attributes, mask, dispp, NULL); +} + /* * mgr should be locked. */ @@ -2734,7 +2766,7 @@ dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr, - isc_socket_t **sockp) + isc_socket_t **sockp, isc_socket_t *dup_socket) { unsigned int i, j; isc_socket_t *held[DNS_DISPATCH_HELD]; @@ -2774,7 +2806,7 @@ get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, nports)]; isc_sockaddr_setport(&localaddr_bound, prt); result = open_socket(sockmgr, &localaddr_bound, - 0, &sock); + 0, &sock, NULL); if (result == ISC_R_SUCCESS || result != ISC_R_ADDRINUSE) { disp->localport = prt; @@ -2790,7 +2822,8 @@ get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, } else { /* Allow to reuse address for non-random ports. */ result = open_socket(sockmgr, localaddr, - ISC_SOCKET_REUSEADDRESS, &sock); + ISC_SOCKET_REUSEADDRESS, &sock, + dup_socket); if (result == ISC_R_SUCCESS) *sockp = sock; @@ -2802,7 +2835,7 @@ get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, i = 0; for (j = 0; j < 0xffffU; j++) { - result = open_socket(sockmgr, localaddr, 0, &sock); + result = open_socket(sockmgr, localaddr, 0, &sock, NULL); if (result != ISC_R_SUCCESS) goto end; else if (!anyport) @@ -2841,7 +2874,8 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr, unsigned int maxrequests, unsigned int attributes, - dns_dispatch_t **dispp) + dns_dispatch_t **dispp, + isc_socket_t *dup_socket) { isc_result_t result; dns_dispatch_t *disp; @@ -2857,9 +2891,21 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, return (result); if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) { - result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock); + result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock, + dup_socket); if (result != ISC_R_SUCCESS) goto deallocate_dispatch; + + if (isc_log_wouldlog(dns_lctx, 90)) { + char addrbuf[ISC_SOCKADDR_FORMATSIZE]; + + isc_sockaddr_format(localaddr, addrbuf, + ISC_SOCKADDR_FORMATSIZE); + mgr_log(mgr, LVL(90), "dns_dispatch_createudp: Created" + " UDP dispatch for %s with socket fd %d\n", + addrbuf, isc_socket_getfd(sock)); + } + } else { isc_sockaddr_t sa_any; @@ -2871,7 +2917,7 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, */ isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr)); if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) { - result = open_socket(sockmgr, localaddr, 0, &sock); + result = open_socket(sockmgr, localaddr, 0, &sock, NULL); if (sock != NULL) isc_socket_detach(&sock); if (result != ISC_R_SUCCESS) @@ -2938,6 +2984,7 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, dispatch_log(disp, LVL(90), "created socket %p", disp->socket); *dispp = disp; + return (result); /* diff --git a/lib/dns/include/dns/dispatch.h b/lib/dns/include/dns/dispatch.h index b9f68426be..b0c59e54ba 100644 --- a/lib/dns/include/dns/dispatch.h +++ b/lib/dns/include/dns/dispatch.h @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: dispatch.h,v 1.62 2009/01/27 23:47:54 tbox Exp $ */ +/* $Id: dispatch.h,v 1.63 2011/07/28 04:04:37 each Exp $ */ #ifndef DNS_DISPATCH_H #define DNS_DISPATCH_H 1 @@ -245,6 +245,15 @@ dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, unsigned int buckets, unsigned int increment, unsigned int attributes, unsigned int mask, dns_dispatch_t **dispp); + +isc_result_t +dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, + isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr, + unsigned int buffersize, + unsigned int maxbuffers, unsigned int maxrequests, + unsigned int buckets, unsigned int increment, + unsigned int attributes, unsigned int mask, + dns_dispatch_t **dispp, dns_dispatch_t *dup); /*%< * Attach to existing dns_dispatch_t if one is found with dns_dispatchmgr_find, * otherwise create a new UDP dispatch. diff --git a/lib/isc/include/isc/namespace.h b/lib/isc/include/isc/namespace.h index bc5ab60786..427e332a18 100644 --- a/lib/isc/include/isc/namespace.h +++ b/lib/isc/include/isc/namespace.h @@ -14,7 +14,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: namespace.h,v 1.9 2010/12/04 13:25:59 marka Exp $ */ +/* $Id: namespace.h,v 1.10 2011/07/28 04:04:37 each Exp $ */ #ifndef ISCAPI_NAMESPACE_H #define ISCAPI_NAMESPACE_H 1 @@ -88,6 +88,7 @@ #define isc_mempool_getfillcount isc__mempool_getfillcount #define isc_socket_create isc__socket_create +#define isc_socket_dup isc__socket_dup #define isc_socket_attach isc__socket_attach #define isc_socket_detach isc__socket_detach #define isc_socketmgr_create isc__socketmgr_create diff --git a/lib/isc/include/isc/socket.h b/lib/isc/include/isc/socket.h index c4ab0e2959..0307b8082d 100644 --- a/lib/isc/include/isc/socket.h +++ b/lib/isc/include/isc/socket.h @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: socket.h,v 1.94 2009/10/01 01:30:01 sar Exp $ */ +/* $Id: socket.h,v 1.95 2011/07/28 04:04:37 each Exp $ */ #ifndef ISC_SOCKET_H #define ISC_SOCKET_H 1 @@ -449,6 +449,12 @@ isc_socket_create(isc_socketmgr_t *manager, *\li #ISC_R_UNEXPECTED */ +isc_result_t +isc_socket_dup(isc_socket_t *sock0, isc_socket_t **socketp); +/*%< + * Duplicate an existing socket, reusing its file descriptor. + */ + void isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how); @@ -1102,6 +1108,11 @@ void *isc_socket_gettag(isc_socket_t *socket); * Get the tag associated with a socket, if any. */ +int isc_socket_getfd(isc_socket_t *socket); +/*%< + * Get the file descriptor associated with a socket + */ + void isc__socketmgr_setreserved(isc_socketmgr_t *mgr, isc_uint32_t); /*%< diff --git a/lib/isc/tests/Makefile.in b/lib/isc/tests/Makefile.in index 15082373d3..063168d3ab 100644 --- a/lib/isc/tests/Makefile.in +++ b/lib/isc/tests/Makefile.in @@ -12,7 +12,7 @@ # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. -# $Id: Makefile.in,v 1.2 2011/07/06 05:05:51 each Exp $ +# $Id: Makefile.in,v 1.3 2011/07/28 04:04:37 each Exp $ srcdir = @srcdir@ VPATH = @srcdir@ @@ -35,10 +35,10 @@ ISCDEPLIBS = ../libisc.@A@ LIBS = @LIBS@ @ATFLIBS@ OBJS = isctest.@O@ -SRCS = isctest.c taskpool_test.c +SRCS = isctest.c taskpool_test.c socket_test.c SUBDIRS = -TARGETS = taskpool_test@EXEEXT@ +TARGETS = taskpool_test@EXEEXT@ socket_test@EXEEXT@ @BIND9_MAKE_RULES@ @@ -46,5 +46,9 @@ taskpool_test@EXEEXT@: taskpool_test.@O@ isctest.@O@ ${ISCDEPLIBS} ${LIBTOOL_MODE_LINK} ${PURIFY} ${CC} ${CFLAGS} ${LDFLAGS} -o $@ \ taskpool_test.@O@ isctest.@O@ ${ISCLIBS} ${LIBS} +socket_test@EXEEXT@: socket_test.@O@ isctest.@O@ ${ISCDEPLIBS} + ${LIBTOOL_MODE_LINK} ${PURIFY} ${CC} ${CFLAGS} ${LDFLAGS} -o $@ \ + socket_test.@O@ isctest.@O@ ${ISCLIBS} ${LIBS} + clean distclean:: rm -f ${TARGETS} diff --git a/lib/isc/tests/isctest.c b/lib/isc/tests/isctest.c index 6300b52161..6d0eecfe7f 100644 --- a/lib/isc/tests/isctest.c +++ b/lib/isc/tests/isctest.c @@ -14,7 +14,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: isctest.c,v 1.2 2011/07/06 05:05:52 each Exp $ */ +/* $Id: isctest.c,v 1.3 2011/07/28 04:04:37 each Exp $ */ /*! \file */ @@ -26,7 +26,10 @@ #include #include #include +#include #include +#include +#include #include #include "isctest.h" @@ -35,6 +38,8 @@ isc_mem_t *mctx = NULL; isc_entropy_t *ectx = NULL; isc_log_t *lctx = NULL; isc_taskmgr_t *taskmgr = NULL; +isc_timermgr_t *timermgr = NULL; +isc_socketmgr_t *socketmgr = NULL; int ncpus; static isc_boolean_t hash_active = ISC_FALSE; @@ -54,8 +59,37 @@ static isc_logcategory_t categories[] = { { NULL, 0 } }; +static void +cleanup_managers() { + if (socketmgr != NULL) + isc_socketmgr_destroy(&socketmgr); + if (taskmgr != NULL) + isc_taskmgr_destroy(&taskmgr); + if (timermgr != NULL) + isc_timermgr_destroy(&timermgr); +} + +static isc_result_t +create_managers() { + isc_result_t result; +#ifdef ISC_PLATFORM_USETHREADS + ncpus = isc_os_ncpus(); +#else + ncpus = 1; +#endif + + CHECK(isc_taskmgr_create(mctx, ncpus, 0, &taskmgr)); + CHECK(isc_timermgr_create(mctx, &timermgr)); + CHECK(isc_socketmgr_create(mctx, &socketmgr)); + return (ISC_R_SUCCESS); + + cleanup: + cleanup_managers(); + return (result); +} + isc_result_t -isc_test_begin(FILE *logfile) { +isc_test_begin(FILE *logfile, isc_boolean_t start_managers) { isc_result_t result; isc_mem_debugging |= ISC_MEM_DEBUGRECORD; @@ -90,7 +124,8 @@ isc_test_begin(FILE *logfile) { ncpus = 1; #endif - CHECK(isc_taskmgr_create(mctx, ncpus, 0, &taskmgr)); + if (start_managers) + CHECK(create_managers()); return (ISC_R_SUCCESS); @@ -111,6 +146,9 @@ isc_test_end() { } if (ectx != NULL) isc_entropy_detach(&ectx); + + cleanup_managers(); + if (mctx != NULL) isc_mem_destroy(&mctx); } diff --git a/lib/isc/tests/isctest.h b/lib/isc/tests/isctest.h index 36b0852589..243e7feb15 100644 --- a/lib/isc/tests/isctest.h +++ b/lib/isc/tests/isctest.h @@ -14,7 +14,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: isctest.h,v 1.2 2011/07/06 05:05:52 each Exp $ */ +/* $Id: isctest.h,v 1.3 2011/07/28 04:04:37 each Exp $ */ /*! \file */ @@ -42,10 +42,12 @@ extern isc_mem_t *mctx; extern isc_entropy_t *ectx; extern isc_log_t *lctx; extern isc_taskmgr_t *taskmgr; +isc_timermgr_t *timermgr; +isc_socketmgr_t *socketmgr; extern int ncpus; isc_result_t -isc_test_begin(FILE *logfile); +isc_test_begin(FILE *logfile, isc_boolean_t start_managers); void isc_test_end(void); diff --git a/lib/isc/tests/socket_test.c b/lib/isc/tests/socket_test.c new file mode 100644 index 0000000000..3aaaf043e5 --- /dev/null +++ b/lib/isc/tests/socket_test.c @@ -0,0 +1,274 @@ +/* + * Copyright (C) 2011 Internet Systems Consortium, Inc. ("ISC") + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +/* $Id: socket_test.c,v 1.2 2011/07/28 04:04:37 each Exp $ */ + +/*! \file */ + +#include + +#include + +#include +#include + +#include + +#include "../task_p.h" +#include "isctest.h" + +/* + * Helper functions + */ +typedef struct { + isc_boolean_t done; + isc_result_t result; +} completion_t; + +static void +completion_init(completion_t *completion) { + completion->done = ISC_FALSE; +} + +static void +event_done(isc_task_t *task, isc_event_t *event) { + isc_socketevent_t *dev; + completion_t *completion = event->ev_arg; + + UNUSED(task); + + dev = (isc_socketevent_t *) event; + completion->result = dev->result; + completion->done = ISC_TRUE; + isc_event_free(&event); +} + +static void +nap(isc_uint32_t usec) { +#ifdef HAVE_NANOSLEEP + struct timespec ts; + + ts.tv_sec = usec / 1000000; + ts.tv_nsec = (usec % 1000000) * 1000; + nanosleep(&ts, NULL); +#elif HAVE_USLEEP + usleep(usec); +#else + /* Round up to the nearest second and sleep, instead */ + sleep((usec / 1000000) + 1); +#endif +} + +static isc_result_t +waitfor(completion_t *completion) { + int i = 0; + while (!completion->done && i++ < 5000) { +#ifndef ISC_PLATFORM_USETHREADS + while (isc__taskmgr_ready(taskmgr)) + isc__taskmgr_dispatch(taskmgr); +#endif + nap(1000); + } + if (completion->done) + return (ISC_R_SUCCESS); + return (ISC_R_FAILURE); +} + +/* + * Individual unit tests + */ + +/* Test UDP sendto/recv (IPv4) */ +ATF_TC(udp_sendto); +ATF_TC_HEAD(udp_sendto, tc) { + atf_tc_set_md_var(tc, "descr", "UDP sendto/recv"); +} +ATF_TC_BODY(udp_sendto, tc) { + isc_result_t result; + isc_sockaddr_t addr1, addr2; + struct in_addr in; + isc_socket_t *s1 = NULL, *s2 = NULL; + isc_task_t *task = NULL; + char sendbuf[BUFSIZ], recvbuf[BUFSIZ]; + completion_t completion; + isc_region_t r; + + UNUSED(tc); + + result = isc_test_begin(NULL, ISC_TRUE); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + + /* + * Create two sockets: 127.0.0.1/5444 and 127.0.0.1/5445, talking to + * each other. + */ + in.s_addr = inet_addr("127.0.0.1"); + isc_sockaddr_fromin(&addr1, &in, 5444); + isc_sockaddr_fromin(&addr2, &in, 5445); + + result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s1); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + result = isc_socket_bind(s1, &addr1, ISC_SOCKET_REUSEADDRESS); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + + result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s2); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + result = isc_socket_bind(s2, &addr2, ISC_SOCKET_REUSEADDRESS); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + + result = isc_task_create(taskmgr, 0, &task); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + + strcpy(sendbuf, "Hello"); + r.base = (void *) sendbuf; + r.length = strlen(sendbuf) + 1; + + completion_init(&completion); + result = isc_socket_sendto(s1, &r, task, event_done, &completion, + &addr2, NULL); + ATF_CHECK_EQ(result, ISC_R_SUCCESS); + waitfor(&completion); + ATF_CHECK(completion.done); + ATF_CHECK_EQ(completion.result, ISC_R_SUCCESS); + + r.base = (void *) recvbuf; + r.length = BUFSIZ; + completion_init(&completion); + result = isc_socket_recv(s2, &r, 1, task, event_done, &completion); + ATF_CHECK_EQ(result, ISC_R_SUCCESS); + waitfor(&completion); + ATF_CHECK(completion.done); + ATF_CHECK_EQ(completion.result, ISC_R_SUCCESS); + ATF_CHECK_STREQ(recvbuf, "Hello"); + + isc_task_detach(&task); + + isc_socket_detach(&s1); + isc_socket_detach(&s2); + + isc_test_end(); +} + +/* Test UDP sendto/recv with duplicated socket */ +ATF_TC(udp_dup); +ATF_TC_HEAD(udp_dup, tc) { + atf_tc_set_md_var(tc, "descr", "duplicated socket sendto/recv"); +} +ATF_TC_BODY(udp_dup, tc) { + isc_result_t result; + isc_sockaddr_t addr1, addr2; + struct in_addr in; + isc_socket_t *s1 = NULL, *s2 = NULL, *s3 = NULL; + isc_task_t *task = NULL; + char sendbuf[BUFSIZ], recvbuf[BUFSIZ]; + completion_t completion; + isc_region_t r; + + UNUSED(tc); + + result = isc_test_begin(NULL, ISC_TRUE); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + + /* + * Create two sockets: 127.0.0.1/5444 and 127.0.0.1/5445, talking to + * each other. + */ + in.s_addr = inet_addr("127.0.0.1"); + isc_sockaddr_fromin(&addr1, &in, 5444); + isc_sockaddr_fromin(&addr2, &in, 5445); + + result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s1); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + result = isc_socket_bind(s1, &addr1, ISC_SOCKET_REUSEADDRESS); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + + result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s2); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + result = isc_socket_dup(s2, &s3); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + + result = isc_socket_bind(s2, &addr2, ISC_SOCKET_REUSEADDRESS); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + result = isc_socket_bind(s3, &addr2, ISC_SOCKET_REUSEADDRESS); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + + result = isc_task_create(taskmgr, 0, &task); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + + strcpy(sendbuf, "Hello"); + r.base = (void *) sendbuf; + r.length = strlen(sendbuf) + 1; + + completion_init(&completion); + result = isc_socket_sendto(s1, &r, task, event_done, &completion, + &addr2, NULL); + ATF_CHECK_EQ(result, ISC_R_SUCCESS); + waitfor(&completion); + ATF_CHECK(completion.done); + ATF_CHECK_EQ(completion.result, ISC_R_SUCCESS); + + strcpy(sendbuf, "World"); + r.base = (void *) sendbuf; + r.length = strlen(sendbuf) + 1; + + completion_init(&completion); + result = isc_socket_sendto(s1, &r, task, event_done, &completion, + &addr2, NULL); + ATF_CHECK_EQ(result, ISC_R_SUCCESS); + waitfor(&completion); + ATF_CHECK(completion.done); + ATF_CHECK_EQ(completion.result, ISC_R_SUCCESS); + + r.base = (void *) recvbuf; + r.length = BUFSIZ; + completion_init(&completion); + result = isc_socket_recv(s2, &r, 1, task, event_done, &completion); + ATF_CHECK_EQ(result, ISC_R_SUCCESS); + waitfor(&completion); + ATF_CHECK(completion.done); + ATF_CHECK_EQ(completion.result, ISC_R_SUCCESS); + ATF_CHECK_STREQ(recvbuf, "Hello"); + + r.base = (void *) recvbuf; + r.length = BUFSIZ; + completion_init(&completion); + result = isc_socket_recv(s3, &r, 1, task, event_done, &completion); + ATF_CHECK_EQ(result, ISC_R_SUCCESS); + waitfor(&completion); + ATF_CHECK(completion.done); + ATF_CHECK_EQ(completion.result, ISC_R_SUCCESS); + ATF_CHECK_STREQ(recvbuf, "World"); + + isc_task_detach(&task); + + isc_socket_detach(&s1); + isc_socket_detach(&s2); + isc_socket_detach(&s3); + + isc_test_end(); +} + + +/* + * Main + */ +ATF_TP_ADD_TCS(tp) { + ATF_TP_ADD_TC(tp, udp_sendto); + ATF_TP_ADD_TC(tp, udp_dup); + + return (atf_no_error()); +} + diff --git a/lib/isc/tests/taskpool_test.c b/lib/isc/tests/taskpool_test.c index 63bce41d7c..ab80f452bb 100644 --- a/lib/isc/tests/taskpool_test.c +++ b/lib/isc/tests/taskpool_test.c @@ -14,7 +14,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: taskpool_test.c,v 1.2 2011/07/06 05:05:52 each Exp $ */ +/* $Id: taskpool_test.c,v 1.3 2011/07/28 04:04:37 each Exp $ */ /*! \file */ @@ -44,7 +44,7 @@ ATF_TC_BODY(create_pool, tc) { UNUSED(tc); - result = isc_test_begin(NULL); + result = isc_test_begin(NULL, ISC_TRUE); ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); result = isc_taskpool_create(taskmgr, mctx, 8, 2, &pool); @@ -68,7 +68,7 @@ ATF_TC_BODY(expand_pool, tc) { UNUSED(tc); - result = isc_test_begin(NULL); + result = isc_test_begin(NULL, ISC_TRUE); ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); result = isc_taskpool_create(taskmgr, mctx, 10, 2, &pool1); @@ -121,7 +121,7 @@ ATF_TC_BODY(get_tasks, tc) { UNUSED(tc); - result = isc_test_begin(NULL); + result = isc_test_begin(NULL, ISC_TRUE); ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); result = isc_taskpool_create(taskmgr, mctx, 2, 2, &pool); diff --git a/lib/isc/unix/socket.c b/lib/isc/unix/socket.c index 9b73994d8b..796044d5b2 100644 --- a/lib/isc/unix/socket.c +++ b/lib/isc/unix/socket.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: socket.c,v 1.340 2011/07/21 23:47:53 tbox Exp $ */ +/* $Id: socket.c,v 1.341 2011/07/28 04:04:37 each Exp $ */ /*! \file */ @@ -334,7 +334,8 @@ struct isc__socket { listener : 1, /* listener socket */ connected : 1, connecting : 1, /* connect pending */ - bound : 1; /* bound to local addr */ + bound : 1, /* bound to local addr */ + dupped : 1; #ifdef ISC_NET_RECVOVERFLOW unsigned char overflow; /* used for MSG_TRUNC fake */ @@ -428,6 +429,10 @@ static isc__socketmgr_t *socketmgr = NULL; # define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER) #endif +static isc_result_t socket_create(isc_socketmgr_t *manager0, int pf, + isc_sockettype_t type, + isc_socket_t **socketp, + isc_socket_t *dup_socket); static void send_recvdone_event(isc__socket_t *, isc_socketevent_t **); static void send_senddone_event(isc__socket_t *, isc_socketevent_t **); static void free_socket(isc__socket_t **); @@ -2045,6 +2050,7 @@ allocate_socket(isc__socketmgr_t *manager, isc_sockettype_t type, sock->manager = manager; sock->type = type; sock->fd = -1; + sock->dupped = 0; sock->statsindex = NULL; ISC_LINK_INIT(sock, link); @@ -2221,7 +2227,8 @@ clear_bsdcompat(void) { #endif static isc_result_t -opensocket(isc__socketmgr_t *manager, isc__socket_t *sock) { +opensocket(isc__socketmgr_t *manager, isc__socket_t *sock, + isc__socket_t *dup_socket) { char strbuf[ISC_STRERRORSIZE]; const char *err = "socket"; int tries = 0; @@ -2234,22 +2241,28 @@ opensocket(isc__socketmgr_t *manager, isc__socket_t *sock) { #endif again: - switch (sock->type) { - case isc_sockettype_udp: - sock->fd = socket(sock->pf, SOCK_DGRAM, IPPROTO_UDP); - break; - case isc_sockettype_tcp: - sock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP); - break; - case isc_sockettype_unix: - sock->fd = socket(sock->pf, SOCK_STREAM, 0); - break; - case isc_sockettype_fdwatch: - /* - * We should not be called for isc_sockettype_fdwatch sockets. - */ - INSIST(0); - break; + if (dup_socket == NULL) { + switch (sock->type) { + case isc_sockettype_udp: + sock->fd = socket(sock->pf, SOCK_DGRAM, IPPROTO_UDP); + break; + case isc_sockettype_tcp: + sock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP); + break; + case isc_sockettype_unix: + sock->fd = socket(sock->pf, SOCK_STREAM, 0); + break; + case isc_sockettype_fdwatch: + /* + * We should not be called for isc_sockettype_fdwatch + * sockets. + */ + INSIST(0); + break; + } + } else { + sock->fd = dup(dup_socket->fd); + sock->dupped = 1; } if (sock->fd == -1 && errno == EINTR && tries++ < 42) goto again; @@ -2326,6 +2339,9 @@ opensocket(isc__socketmgr_t *manager, isc__socket_t *sock) { } } + if (dup_socket != NULL) + goto setup_done; + if (make_nonblock(sock->fd) != ISC_R_SUCCESS) { (void)close(sock->fd); return (ISC_R_UNEXPECTED); @@ -2509,20 +2525,21 @@ opensocket(isc__socketmgr_t *manager, isc__socket_t *sock) { } #endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */ +setup_done: inc_stats(manager->stats, sock->statsindex[STATID_OPEN]); return (ISC_R_SUCCESS); } -/*% - * Create a new 'type' socket managed by 'manager'. Events - * will be posted to 'task' and when dispatched 'action' will be - * called with 'arg' as the arg value. The new socket is returned - * in 'socketp'. +/* + * Create a 'type' socket or duplicate an existing socket, managed + * by 'manager'. Events will be posted to 'task' and when dispatched + * 'action' will be called with 'arg' as the arg value. The new + * socket is returned in 'socketp'. */ -ISC_SOCKETFUNC_SCOPE isc_result_t -isc__socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type, - isc_socket_t **socketp) +static isc_result_t +socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type, + isc_socket_t **socketp, isc_socket_t *dup_socket) { isc__socket_t *sock = NULL; isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; @@ -2554,7 +2571,8 @@ isc__socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type, } sock->pf = pf; - result = opensocket(manager, sock); + + result = opensocket(manager, sock, (isc__socket_t *)dup_socket); if (result != ISC_R_SUCCESS) { inc_stats(manager->stats, sock->statsindex[STATID_OPENFAIL]); free_socket(&sock); @@ -2589,11 +2607,40 @@ isc__socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type, UNLOCK(&manager->lock); socket_log(sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, - ISC_MSG_CREATED, "created"); + ISC_MSG_CREATED, dup_socket == NULL ? "dupped" : "created"); return (ISC_R_SUCCESS); } +/*% + * Create a new 'type' socket managed by 'manager'. Events + * will be posted to 'task' and when dispatched 'action' will be + * called with 'arg' as the arg value. The new socket is returned + * in 'socketp'. + */ +ISC_SOCKETFUNC_SCOPE isc_result_t +isc__socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type, + isc_socket_t **socketp) +{ + return (socket_create(manager0, pf, type, socketp, NULL)); +} + +/*% + * Duplicate an existing socket. The new socket is returned + * in 'socketp'. + */ +ISC_SOCKETFUNC_SCOPE isc_result_t +isc__socket_dup(isc_socket_t *sock0, isc_socket_t **socketp) { + isc__socket_t *sock = (isc__socket_t *)sock0; + + REQUIRE(VALID_SOCKET(sock)); + REQUIRE(socketp != NULL && *socketp == NULL); + + return (socket_create((isc_socketmgr_t *) sock->manager, + sock->pf, sock->type, socketp, + sock0)); +} + #ifdef BIND9 ISC_SOCKETFUNC_SCOPE isc_result_t isc__socket_open(isc_socket_t *sock0) { @@ -2612,7 +2659,7 @@ isc__socket_open(isc_socket_t *sock0) { */ REQUIRE(sock->fd == -1); - result = opensocket(sock->manager, sock); + result = opensocket(sock->manager, sock, NULL); if (result != ISC_R_SUCCESS) sock->fd = -1; @@ -2792,6 +2839,7 @@ isc__socket_close(isc_socket_t *sock0) { int fd; isc__socketmgr_t *manager; + fflush(stdout); REQUIRE(VALID_SOCKET(sock)); LOCK(&sock->lock); @@ -2812,6 +2860,7 @@ isc__socket_close(isc_socket_t *sock0) { manager = sock->manager; fd = sock->fd; sock->fd = -1; + sock->dupped = 0; memset(sock->name, 0, sizeof(sock->name)); sock->tag = NULL; sock->listener = 0; @@ -4977,48 +5026,49 @@ isc__socket_bind(isc_socket_t *sock0, isc_sockaddr_t *sockaddr, UNLOCK(&sock->lock); return (ISC_R_FAMILYMISMATCH); } - /* - * Only set SO_REUSEADDR when we want a specific port. - */ + if (!sock->dupped) { + /* + * Only set SO_REUSEADDR when we want a specific port. + */ #ifdef AF_UNIX - if (sock->pf == AF_UNIX) - goto bind_socket; + if (sock->pf == AF_UNIX) + goto bind_socket; #endif - if ((options & ISC_SOCKET_REUSEADDRESS) != 0 && - isc_sockaddr_getport(sockaddr) != (in_port_t)0 && - setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on, - sizeof(on)) < 0) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d) %s", sock->fd, - isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, - ISC_MSG_FAILED, "failed")); - /* Press on... */ - } + if ((options & ISC_SOCKET_REUSEADDRESS) != 0 && + isc_sockaddr_getport(sockaddr) != (in_port_t)0 && + setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on, + sizeof(on)) < 0) { + UNEXPECTED_ERROR(__FILE__, __LINE__, + "setsockopt(%d) %s", sock->fd, + isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, + ISC_MSG_FAILED, "failed")); + /* Press on... */ + } #ifdef AF_UNIX - bind_socket: + bind_socket: #endif - if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) { - inc_stats(sock->manager->stats, - sock->statsindex[STATID_BINDFAIL]); + if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) { + inc_stats(sock->manager->stats, + sock->statsindex[STATID_BINDFAIL]); - UNLOCK(&sock->lock); - switch (errno) { - case EACCES: - return (ISC_R_NOPERM); - case EADDRNOTAVAIL: - return (ISC_R_ADDRNOTAVAIL); - case EADDRINUSE: - return (ISC_R_ADDRINUSE); - case EINVAL: - return (ISC_R_BOUND); - default: - isc__strerror(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s", - strbuf); - return (ISC_R_UNEXPECTED); + UNLOCK(&sock->lock); + switch (errno) { + case EACCES: + return (ISC_R_NOPERM); + case EADDRNOTAVAIL: + return (ISC_R_ADDRNOTAVAIL); + case EADDRINUSE: + return (ISC_R_ADDRINUSE); + case EINVAL: + return (ISC_R_BOUND); + default: + isc__strerror(errno, strbuf, sizeof(strbuf)); + UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s", + strbuf); + return (ISC_R_UNEXPECTED); + } } } - socket_log(sock, sockaddr, TRACE, isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound"); sock->bound = 1; @@ -5826,6 +5876,13 @@ isc__socket_register() { } #endif +int +isc_socket_getfd(isc_socket_t *socket0) { + isc__socket_t *socket = (isc__socket_t *)socket0; + + return ((short) socket->fd); +} + #if defined(HAVE_LIBXML2) && defined(BIND9) static const char * diff --git a/lib/isc/win32/libisc.def b/lib/isc/win32/libisc.def index 740f3cefc2..3c20d99a3d 100644 --- a/lib/isc/win32/libisc.def +++ b/lib/isc/win32/libisc.def @@ -88,6 +88,7 @@ isc__socket_close isc__socket_connect isc__socket_create isc__socket_detach +isc__socket_dup isc__socket_filter isc__socket_getname isc__socket_getpeername diff --git a/lib/isc/win32/socket.c b/lib/isc/win32/socket.c index bf915f7f0d..4343454a44 100644 --- a/lib/isc/win32/socket.c +++ b/lib/isc/win32/socket.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: socket.c,v 1.87 2010/12/09 06:08:05 marka Exp $ */ +/* $Id: socket.c,v 1.88 2011/07/28 04:04:37 each Exp $ */ /* This code uses functions which are only available on Server 2003 and * higher, and Windows XP and higher. @@ -265,7 +265,8 @@ struct isc_socket { unsigned int listener : 1, /* listener socket */ connected : 1, pending_connect : 1, /* connect pending */ - bound : 1; /* bound to local addr */ + bound : 1, /* bound to local addr */ + dupped : 1; /* created by isc_socket_dup() */ unsigned int pending_iocp; /* Should equal the counters below. Debug. */ unsigned int pending_recv; /* Number of outstanding recv() calls. */ unsigned int pending_send; /* Number of outstanding send() calls. */ @@ -351,6 +352,10 @@ enum { #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER) #define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER) +static isc_result_t socket_create(isc_socketmgr_t *manager0, int pf, + isc_sockettype_t type, + isc_socket_t **socketp, + isc_socket_t *dup_socket); static isc_threadresult_t WINAPI SocketIoThread(LPVOID ThreadContext); static void maybe_free_socket(isc_socket_t **, int); static void free_socket(isc_socket_t **, int); @@ -1461,6 +1466,7 @@ allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type, sock->connected = 0; sock->pending_connect = 0; sock->bound = 0; + sock->dupped = 0; memset(sock->name, 0, sizeof(sock->name)); // zero the name field _set_state(sock, SOCK_INITIALIZED); @@ -1623,9 +1629,10 @@ free_socket(isc_socket_t **sockp, int lineno) { * called with 'arg' as the arg value. The new socket is returned * in 'socketp'. */ -isc_result_t -isc__socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, - isc_socket_t **socketp) { +static isc_result_t +socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, + isc_socket_t **socketp, isc_socket_t *dup_socket) +{ isc_socket_t *sock = NULL; isc_result_t result; #if defined(USE_CMSG) @@ -1647,27 +1654,35 @@ isc__socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, return (result); sock->pf = pf; - switch (type) { - case isc_sockettype_udp: - sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP); - if (sock->fd != INVALID_SOCKET) { - result = connection_reset_fix(sock->fd); - if (result != ISC_R_SUCCESS) { - socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, - "closed %d %d %d con_reset_fix_failed", - sock->pending_recv, sock->pending_send, - sock->references); - closesocket(sock->fd); - _set_state(sock, SOCK_CLOSED); - sock->fd = INVALID_SOCKET; - free_socket(&sock, __LINE__); - return (result); + if (dup_socket == NULL) { + switch (type) { + case isc_sockettype_udp: + sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP); + if (sock->fd != INVALID_SOCKET) { + result = connection_reset_fix(sock->fd); + if (result != ISC_R_SUCCESS) { + socket_log(__LINE__, sock, + NULL, EVENT, NULL, 0, 0, + "closed %d %d %d " + "con_reset_fix_failed", + sock->pending_recv, + sock->pending_send, + sock->references); + closesocket(sock->fd); + _set_state(sock, SOCK_CLOSED); + sock->fd = INVALID_SOCKET; + free_socket(&sock, __LINE__); + return (result); + } } + break; + case isc_sockettype_tcp: + sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP); + break; } - break; - case isc_sockettype_tcp: - sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP); - break; + } else { + sock->fd = dup(dup_socket->fd); + sock->dupped = 1; } if (sock->fd == INVALID_SOCKET) { @@ -1786,12 +1801,29 @@ isc__socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, InterlockedIncrement(&manager->totalSockets); UNLOCK(&manager->lock); - socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, - ISC_MSG_CREATED, "created %u type %u", sock->fd, type); + socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, + ISC_MSGSET_SOCKET, ISC_MSG_CREATED, + "created %u type %u", sock->fd, type); return (ISC_R_SUCCESS); } +isc_result_t +isc__socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, + isc_socket_t **socketp) +{ + return (socket_create(manager, pf, type, socketp, NULL)); +} + +isc_result_t +isc__socket_dup(isc_socket_t *sock, isc_socket_t **socketp) { + REQUIRE(VALID_SOCKET(sock)); + REQUIRE(socketp != NULL && *socketp == NULL); + + return (socket_create(sock->manager, sock->pf, sock->type, + socketp, sock)); +} + isc_result_t isc_socket_open(isc_socket_t *sock) { REQUIRE(VALID_SOCKET(sock));