From 8c51a32e5c3dbe548b37a5e1ea70f4232bfce2c3 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Sat, 2 Oct 2021 14:52:46 -0700 Subject: [PATCH 1/6] netmgr: add isc_nm_routeconnect() isc_nm_routeconnect() opens a route/netlink socket, then calls a connect callback, much like isc_nm_udpconnect(), with a handle that can then be monitored for network changes. Internally the socket is treated as a UDP socket, since route/netlink sockets follow the datagram contract. --- lib/isc/include/isc/netmgr.h | 12 ++ lib/isc/netmgr/netmgr-int.h | 14 +++ lib/isc/netmgr/netmgr.c | 24 +++- lib/isc/netmgr/udp.c | 228 ++++++++++++++++++++++++++++++++++- 4 files changed, 270 insertions(+), 8 deletions(-) diff --git a/lib/isc/include/isc/netmgr.h b/lib/isc/include/isc/netmgr.h index 980bff2a14..b517eaaec2 100644 --- a/lib/isc/include/isc/netmgr.h +++ b/lib/isc/include/isc/netmgr.h @@ -232,6 +232,18 @@ isc_nm_udpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, * 'cb'. */ +isc_result_t +isc_nm_routeconnect(isc_nm_t *mgr, isc_nm_cb_t cb, void *cbarg, + size_t extrahandlesize); +/*%< + * Open a route/netlink socket and call 'cb', so the caller can be + * begin listening for interface changes. This behaves similarly to + * isc_nm_udpconnect(). + * + * Returns ISC_R_NOTIMPLEMENTED on systems where route/netlink sockets + * are not supported. + */ + void isc_nm_stoplistening(isc_nmsocket_t *sock); /*%< diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index c776aa08b3..2c5305f37b 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -240,6 +240,8 @@ typedef enum isc__netievent_type { netievent_udpread, netievent_udpcancel, + netievent_routeconnect, + netievent_tcpconnect, netievent_tcpclose, netievent_tcpsend, @@ -967,6 +969,8 @@ struct isc_nmsocket { atomic_bool active; atomic_bool destroying; + bool route_sock; + /*% * Socket is closed if it's not active and all the possible * callbacks were fired, there are no active handles, etc. @@ -1351,6 +1355,12 @@ isc__nm_async_udpclose(isc__networker_t *worker, isc__netievent_t *ev0); * Callback handlers for asynchronous UDP events (listen, stoplisten, send). */ +void +isc__nm_async_routeconnect(isc__networker_t *worker, isc__netievent_t *ev0); +/*%< + * Callback handler for route socket events. + */ + void isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region, isc_nm_cb_t cb, void *cbarg); @@ -1880,6 +1890,8 @@ NETIEVENT_SOCKET_TYPE(tcpstartread); NETIEVENT_SOCKET_REQ_TYPE(tlssend); NETIEVENT_SOCKET_REQ_TYPE(udpconnect); +NETIEVENT_SOCKET_REQ_TYPE(routeconnect); + NETIEVENT_SOCKET_REQ_RESULT_TYPE(connectcb); NETIEVENT_SOCKET_REQ_RESULT_TYPE(readcb); NETIEVENT_SOCKET_REQ_RESULT_TYPE(sendcb); @@ -1946,6 +1958,8 @@ NETIEVENT_SOCKET_REQ_DECL(tcpsend); NETIEVENT_SOCKET_REQ_DECL(tlssend); NETIEVENT_SOCKET_REQ_DECL(udpconnect); +NETIEVENT_SOCKET_REQ_DECL(routeconnect); + NETIEVENT_SOCKET_REQ_RESULT_DECL(connectcb); NETIEVENT_SOCKET_REQ_RESULT_DECL(readcb); NETIEVENT_SOCKET_REQ_RESULT_DECL(sendcb); diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 259e097da2..e89bd3d485 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -913,6 +913,8 @@ process_netievent(isc__networker_t *worker, isc__netievent_t *ievent) { NETIEVENT_CASE(udpcancel); NETIEVENT_CASE(udpclose); + NETIEVENT_CASE(routeconnect); + NETIEVENT_CASE(tcpaccept); NETIEVENT_CASE(tcpconnect); NETIEVENT_CASE(tcplisten); @@ -1072,6 +1074,7 @@ NETIEVENT_SOCKET_REQ_DEF(tcpconnect); NETIEVENT_SOCKET_REQ_DEF(tcpsend); NETIEVENT_SOCKET_REQ_DEF(tlssend); NETIEVENT_SOCKET_REQ_DEF(udpconnect); +NETIEVENT_SOCKET_REQ_DEF(routeconnect); NETIEVENT_SOCKET_REQ_RESULT_DEF(connectcb); NETIEVENT_SOCKET_REQ_RESULT_DEF(readcb); NETIEVENT_SOCKET_REQ_RESULT_DEF(sendcb); @@ -1447,12 +1450,8 @@ isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, REQUIRE(sock != NULL); REQUIRE(mgr != NULL); - REQUIRE(iface != NULL); - - family = iface->type.sa.sa_family; *sock = (isc_nmsocket_t){ .type = type, - .iface = *iface, .fd = -1, .ah_size = 32, .inactivehandles = isc_astack_new( @@ -1460,6 +1459,13 @@ isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, .inactivereqs = isc_astack_new( mgr->mctx, ISC_NM_REQS_STACK_SIZE) }; + if (iface != NULL) { + family = iface->type.sa.sa_family; + sock->iface = *iface; + } else { + family = AF_UNSPEC; + } + #if NETMGR_TRACE sock->backtrace_size = isc_backtrace(sock->backtrace, TRACE_SIZE); ISC_LINK_INIT(sock, active_link); @@ -1492,6 +1498,12 @@ isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, case AF_INET6: sock->statsindex = udp6statsindex; break; + case AF_UNSPEC: + /* + * Route sockets are AF_UNSPEC, and don't + * have stats counters. + */ + break; default: INSIST(0); ISC_UNREACHABLE(); @@ -1521,6 +1533,10 @@ isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, break; } + if (sock->statsindex != NULL) { + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_ACTIVE]); + } + isc_mutex_init(&sock->lock); isc_condition_init(&sock->cond); isc_condition_init(&sock->scond); diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index 3982d49e7f..e4b4b8926d 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -31,6 +31,32 @@ #include "netmgr-int.h" #include "uv-compat.h" +#ifdef HAVE_NET_ROUTE_H +#include +#if defined(RTM_VERSION) && defined(RTM_NEWADDR) && defined(RTM_DELADDR) +#define USE_ROUTE_SOCKET 1 +#define ROUTE_SOCKET_PF PF_ROUTE +#define ROUTE_SOCKET_PROTOCOL 0 +#define MSGHDR rt_msghdr +#define MSGTYPE rtm_type +#endif /* if defined(RTM_VERSION) && defined(RTM_NEWADDR) && \ + * defined(RTM_DELADDR) */ +#endif /* ifdef HAVE_NET_ROUTE_H */ + +#if defined(HAVE_LINUX_NETLINK_H) && defined(HAVE_LINUX_RTNETLINK_H) +#include +#include +#if defined(RTM_NEWADDR) && defined(RTM_DELADDR) +#define USE_ROUTE_SOCKET 1 +#define USE_NETLINK 1 +#define ROUTE_SOCKET_PF PF_NETLINK +#define ROUTE_SOCKET_PROTOCOL NETLINK_ROUTE +#define MSGHDR nlmsghdr +#define MSGTYPE nlmsg_type +#endif /* if defined(RTM_NEWADDR) && defined(RTM_DELADDR) */ +#endif /* if defined(HAVE_LINUX_NETLINK_H) && defined(HAVE_LINUX_RTNETLINK_H) \ + */ + static isc_result_t udp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, isc_sockaddr_t *peer); @@ -189,6 +215,197 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nm_recv_cb_t cb, return (result); } +#ifdef USE_ROUTE_SOCKET +static isc_result_t +route_socket(uv_os_sock_t *fdp) { + isc_result_t result; + uv_os_sock_t fd; +#ifdef USE_NETLINK + struct sockaddr_nl sa; + int r; +#endif + + result = isc__nm_socket(ROUTE_SOCKET_PF, SOCK_RAW, + ROUTE_SOCKET_PROTOCOL, &fd); + if (result != ISC_R_SUCCESS) { + return (result); + } + +#ifdef USE_NETLINK + sa.nl_family = PF_NETLINK; + sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR; + r = bind(fd, (struct sockaddr *)&sa, sizeof(sa)); + if (r < 0) { + isc__nm_closesocket(fd); + return (isc_errno_toresult(r)); + } +#endif + + *fdp = fd; + return (ISC_R_SUCCESS); +} + +static isc_result_t +route_connect_direct(isc_nmsocket_t *sock) { + isc__networker_t *worker = NULL; + isc_result_t result = ISC_R_UNSET; + int r; + + REQUIRE(isc__nm_in_netthread()); + REQUIRE(sock->tid == isc_nm_tid()); + + worker = &sock->mgr->workers[isc_nm_tid()]; + + atomic_store(&sock->connecting, true); + + r = uv_udp_init(&worker->loop, &sock->uv_handle.udp); + RUNTIME_CHECK(r == 0); + uv_handle_set_data(&sock->uv_handle.handle, sock); + + r = uv_timer_init(&worker->loop, &sock->timer); + RUNTIME_CHECK(r == 0); + uv_handle_set_data((uv_handle_t *)&sock->timer, sock); + + if (isc__nm_closing(sock)) { + result = ISC_R_SHUTTINGDOWN; + goto error; + } + + r = uv_udp_open(&sock->uv_handle.udp, sock->fd); + if (r != 0) { + goto done; + } + + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + + atomic_store(&sock->connecting, false); + atomic_store(&sock->connected, true); + +done: + result = isc__nm_uverr2result(r); +error: + + LOCK(&sock->lock); + sock->result = result; + SIGNAL(&sock->cond); + if (!atomic_load(&sock->active)) { + WAIT(&sock->scond, &sock->lock); + } + INSIST(atomic_load(&sock->active)); + UNLOCK(&sock->lock); + + return (result); +} + +/* + * Asynchronous 'udpconnect' call handler: open a new UDP socket and + * call the 'open' callback with a handle. + */ +void +isc__nm_async_routeconnect(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_routeconnect_t *ievent = + (isc__netievent_routeconnect_t *)ev0; + isc_nmsocket_t *sock = ievent->sock; + isc__nm_uvreq_t *req = ievent->req; + isc_result_t result; + + UNUSED(worker); + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_udpsocket); + REQUIRE(sock->parent == NULL); + REQUIRE(sock->tid == isc_nm_tid()); + + result = route_connect_direct(sock); + if (result != ISC_R_SUCCESS) { + atomic_store(&sock->active, false); + isc__nm_udp_close(sock); + isc__nm_connectcb(sock, req, result, true); + } else { + /* + * The callback has to be called after the socket has been + * initialized + */ + isc__nm_connectcb(sock, req, ISC_R_SUCCESS, true); + } + + /* + * The sock is now attached to the handle. + */ + isc__nmsocket_detach(&sock); +} +#endif /* USE_ROUTE_SOCKET */ + +isc_result_t +isc_nm_routeconnect(isc_nm_t *mgr, isc_nm_cb_t cb, void *cbarg, + size_t extrahandlesize) { +#ifdef USE_ROUTE_SOCKET + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *sock = NULL; + isc__netievent_udpconnect_t *event = NULL; + isc__nm_uvreq_t *req = NULL; + + REQUIRE(VALID_NM(mgr)); + + sock = isc_mem_get(mgr->mctx, sizeof(*sock)); + isc__nmsocket_init(sock, mgr, isc_nm_udpsocket, NULL); + + sock->connect_cb = cb; + sock->connect_cbarg = cbarg; + sock->extrahandlesize = extrahandlesize; + sock->result = ISC_R_UNSET; + atomic_init(&sock->client, true); + sock->route_sock = true; + + req = isc__nm_uvreq_get(mgr, sock); + req->cb.connect = cb; + req->cbarg = cbarg; + req->handle = isc__nmhandle_get(sock, NULL, NULL); + + result = route_socket(&sock->fd); + if (result != ISC_R_SUCCESS) { + if (isc__nm_in_netthread()) { + sock->tid = isc_nm_tid(); + } + isc__nmsocket_clearcb(sock); + isc__nm_connectcb(sock, req, result, true); + atomic_store(&sock->closed, true); + isc__nmsocket_detach(&sock); + return (result); + } + + event = isc__nm_get_netievent_routeconnect(mgr, sock, req); + + if (isc__nm_in_netthread()) { + atomic_store(&sock->active, true); + sock->tid = isc_nm_tid(); + isc__nm_async_routeconnect(&mgr->workers[sock->tid], + (isc__netievent_t *)event); + isc__nm_put_netievent_routeconnect(mgr, event); + } else { + atomic_init(&sock->active, false); + sock->tid = 0; + isc__nm_enqueue_ievent(&mgr->workers[sock->tid], + (isc__netievent_t *)event); + } + LOCK(&sock->lock); + while (sock->result == ISC_R_UNSET) { + WAIT(&sock->cond, &sock->lock); + } + atomic_store(&sock->active, true); + BROADCAST(&sock->scond); + UNLOCK(&sock->lock); + + return (sock->result); +#else /* USE_ROUTE_SOCKET */ + UNUSED(mgr); + UNUSED(cb); + UNUSED(cbarg); + UNUSED(extrahandlesize); + return (ISC_R_NOTIMPLEMENTED); +#endif /* USE_ROUTE_SOCKET */ +} + /* * Asynchronous 'udplisten' call handler: start listening on a UDP socket. */ @@ -338,8 +555,8 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, isc__nm_uvreq_t *req = NULL; uint32_t maxudp; bool free_buf; - isc_sockaddr_t sockaddr; isc_result_t result; + isc_sockaddr_t sockaddr, *sa = NULL; REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->tid == isc_nm_tid()); @@ -398,10 +615,13 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, goto free; } - result = isc_sockaddr_fromsockaddr(&sockaddr, addr); - RUNTIME_CHECK(result == ISC_R_SUCCESS); + if (!sock->route_sock) { + result = isc_sockaddr_fromsockaddr(&sockaddr, addr); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + sa = &sockaddr; + } - req = isc__nm_get_read_req(sock, &sockaddr); + req = isc__nm_get_read_req(sock, sa); /* * The callback will be called synchronously, because result is From 075139f60e2090637d94ff50c2f084c5fe0fb523 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Sat, 2 Oct 2021 16:26:43 -0700 Subject: [PATCH 2/6] netmgr: refactor isc__nm_incstats() and isc__nm_decstats() route/netlink sockets don't have stats counters associated with them, so it's now necessary to check whether socket stats exist before incrementing or decrementing them. rather than relying on the caller for this, we now just pass the socket and an index, and the correct stats counter will be updated if it exists. --- lib/isc/netmgr/netmgr.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index e89bd3d485..571b064759 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -1533,10 +1533,6 @@ isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, break; } - if (sock->statsindex != NULL) { - isc__nm_incstats(sock->mgr, sock->statsindex[STATID_ACTIVE]); - } - isc_mutex_init(&sock->lock); isc_condition_init(&sock->cond); isc_condition_init(&sock->scond); From 68e8b19ddc09ed38335c65197dc3c18f90e29c3a Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Sat, 2 Oct 2021 17:17:54 -0700 Subject: [PATCH 3/6] rewrite interfacemgr to use netmgr route sockets --- lib/ns/include/ns/events.h | 1 + lib/ns/interfacemgr.c | 189 +++++++++++++++---------------------- 2 files changed, 77 insertions(+), 113 deletions(-) diff --git a/lib/ns/include/ns/events.h b/lib/ns/include/ns/events.h index b8da178b05..4c5b7bba68 100644 --- a/lib/ns/include/ns/events.h +++ b/lib/ns/include/ns/events.h @@ -20,3 +20,4 @@ #define NS_EVENT_CLIENTCONTROL (ISC_EVENTCLASS_NS + 0) #define NS_EVENT_HOOKASYNCDONE (ISC_EVENTCLASS_NS + 1) +#define NS_EVENT_IFSCAN (ISC_EVENTCLASS_NS + 2) diff --git a/lib/ns/interfacemgr.c b/lib/ns/interfacemgr.c index 9673902b61..8e286e23f7 100644 --- a/lib/ns/interfacemgr.c +++ b/lib/ns/interfacemgr.c @@ -33,10 +33,8 @@ #ifdef HAVE_NET_ROUTE_H #include #if defined(RTM_VERSION) && defined(RTM_NEWADDR) && defined(RTM_DELADDR) -#define USE_ROUTE_SOCKET 1 -#define ROUTE_SOCKET_PROTOCOL PF_ROUTE -#define MSGHDR rt_msghdr -#define MSGTYPE rtm_type +#define MSGHDR rt_msghdr +#define MSGTYPE rtm_type #endif /* if defined(RTM_VERSION) && defined(RTM_NEWADDR) && \ * defined(RTM_DELADDR) */ #endif /* ifdef HAVE_NET_ROUTE_H */ @@ -45,10 +43,8 @@ #include #include #if defined(RTM_NEWADDR) && defined(RTM_DELADDR) -#define USE_ROUTE_SOCKET 1 -#define ROUTE_SOCKET_PROTOCOL PF_NETLINK -#define MSGHDR nlmsghdr -#define MSGTYPE nlmsg_type +#define MSGHDR nlmsghdr +#define MSGTYPE nlmsg_type #endif /* if defined(RTM_NEWADDR) && defined(RTM_DELADDR) */ #endif /* if defined(HAVE_LINUX_NETLINK_H) && defined(HAVE_LINUX_RTNETLINK_H) \ */ @@ -88,11 +84,7 @@ struct ns_interfacemgr { int backlog; /*%< Listen queue size */ atomic_bool shuttingdown; /*%< Interfacemgr shutting down */ ns_clientmgr_t **clientmgrs; /*%< Client managers */ -#ifdef USE_ROUTE_SOCKET - isc_task_t *task; - isc_socket_t *route; - unsigned char buf[2048]; -#endif /* ifdef USE_ROUTE_SOCKET */ + isc_nmhandle_t *route; }; static void @@ -101,35 +93,42 @@ purge_old_interfaces(ns_interfacemgr_t *mgr); static void clearlistenon(ns_interfacemgr_t *mgr); -#ifdef USE_ROUTE_SOCKET static void -route_event(isc_task_t *task, isc_event_t *event) { - isc_socketevent_t *sevent = NULL; - ns_interfacemgr_t *mgr = NULL; - isc_region_t r; - isc_result_t result; - struct MSGHDR *rtm; - bool done = true; +scan_event(isc_task_t *task, isc_event_t *event) { + ns_interfacemgr_t *mgr = (ns_interfacemgr_t *)event->ev_arg; UNUSED(task); - REQUIRE(event->ev_type == ISC_SOCKEVENT_RECVDONE); - mgr = event->ev_arg; - sevent = (isc_socketevent_t *)event; + ns_interfacemgr_scan(mgr, false); + isc_event_free(&event); +} - if (sevent->result != ISC_R_SUCCESS) { - if (sevent->result != ISC_R_CANCELED) { - isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_ERROR, - "automatic interface scanning " - "terminated: %s", - isc_result_totext(sevent->result)); - } - ns_interfacemgr_detach(&mgr); - isc_event_free(&event); +static void +route_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, + void *arg) { + ns_interfacemgr_t *mgr = (ns_interfacemgr_t *)arg; + struct MSGHDR *rtm = NULL; + bool done = true; + + isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_DEBUG(3), "route_recv: %s", + isc_result_totext(eresult)); + + if (handle == NULL) { return; } - rtm = (struct MSGHDR *)mgr->buf; + if (eresult != ISC_R_SUCCESS) { + if (eresult != ISC_R_CANCELED) { + isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_ERROR, + "automatic interface scanning " + "terminated: %s", + isc_result_totext(eresult)); + } + isc_nmhandle_detach(&mgr->route); + return; + } + + rtm = (struct MSGHDR *)region->base; #ifdef RTM_VERSION if (rtm->rtm_version != RTM_VERSION) { isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_ERROR, @@ -137,8 +136,7 @@ route_event(isc_task_t *task, isc_event_t *event) { "rtm->rtm_version mismatch (%u != %u) " "recompile required", rtm->rtm_version, RTM_VERSION); - ns_interfacemgr_detach(&mgr); - isc_event_free(&event); + isc_nmhandle_detach(&mgr->route); return; } #endif /* ifdef RTM_VERSION */ @@ -147,7 +145,11 @@ route_event(isc_task_t *task, isc_event_t *event) { case RTM_NEWADDR: case RTM_DELADDR: if (mgr->route != NULL && mgr->sctx->interface_auto) { - ns_interfacemgr_scan(mgr, false); + isc_event_t *event = NULL; + event = isc_event_allocate(mgr->mctx, mgr, + NS_EVENT_IFSCAN, scan_event, + mgr, sizeof(*event)); + isc_task_send(mgr->excl, &event); } break; default: @@ -156,26 +158,33 @@ route_event(isc_task_t *task, isc_event_t *event) { LOCK(&mgr->lock); if (mgr->route != NULL) { - /* - * Look for next route event. - */ - r.base = mgr->buf; - r.length = sizeof(mgr->buf); - result = isc_socket_recv(mgr->route, &r, 1, mgr->task, - route_event, mgr); - if (result == ISC_R_SUCCESS) { - done = false; - } + isc_nm_read(handle, route_recv, mgr); + done = false; } UNLOCK(&mgr->lock); if (done) { - ns_interfacemgr_detach(&mgr); + isc_nmhandle_detach(&mgr->route); } - isc_event_free(&event); return; } -#endif /* ifdef USE_ROUTE_SOCKET */ + +static void +route_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { + ns_interfacemgr_t *mgr = (ns_interfacemgr_t *)arg; + + isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_DEBUG(3), + "route_connected: %s", isc_result_totext(eresult)); + + if (eresult != ISC_R_SUCCESS) { + return; + } + + INSIST(mgr->route == NULL); + + isc_nmhandle_attach(handle, &mgr->route); + isc_nm_read(handle, route_recv, mgr); +} isc_result_t ns_interfacemgr_create(isc_mem_t *mctx, ns_server_t *sctx, @@ -185,41 +194,33 @@ ns_interfacemgr_create(isc_mem_t *mctx, ns_server_t *sctx, dns_geoip_databases_t *geoip, int ncpus, ns_interfacemgr_t **mgrp) { isc_result_t result; - ns_interfacemgr_t *mgr; + ns_interfacemgr_t *mgr = NULL; -#ifndef USE_ROUTE_SOCKET UNUSED(task); -#endif /* ifndef USE_ROUTE_SOCKET */ REQUIRE(mctx != NULL); REQUIRE(mgrp != NULL); REQUIRE(*mgrp == NULL); mgr = isc_mem_get(mctx, sizeof(*mgr)); + *mgr = (ns_interfacemgr_t){ .taskmgr = taskmgr, + .timermgr = timermgr, + .socketmgr = socketmgr, + .nm = nm, + .dispatchmgr = dispatchmgr, + .generation = 1, + .ncpus = ncpus }; - mgr->mctx = NULL; isc_mem_attach(mctx, &mgr->mctx); - - mgr->sctx = NULL; ns_server_attach(sctx, &mgr->sctx); isc_mutex_init(&mgr->lock); - mgr->excl = NULL; result = isc_taskmgr_excltask(taskmgr, &mgr->excl); if (result != ISC_R_SUCCESS) { goto cleanup_lock; } - mgr->taskmgr = taskmgr; - mgr->timermgr = timermgr; - mgr->socketmgr = socketmgr; - mgr->nm = nm; - mgr->dispatchmgr = dispatchmgr; - mgr->generation = 1; - mgr->listenon4 = NULL; - mgr->listenon6 = NULL; - mgr->ncpus = ncpus; atomic_init(&mgr->shuttingdown, false); ISC_LIST_INIT(mgr->interfaces); @@ -244,28 +245,14 @@ ns_interfacemgr_create(isc_mem_t *mctx, ns_server_t *sctx, UNUSED(geoip); #endif /* if defined(HAVE_GEOIP2) */ -#ifdef USE_ROUTE_SOCKET - mgr->route = NULL; - result = isc_socket_create(mgr->socketmgr, ROUTE_SOCKET_PROTOCOL, - isc_sockettype_raw, &mgr->route); - switch (result) { - case ISC_R_NOPERM: - case ISC_R_SUCCESS: - case ISC_R_NOTIMPLEMENTED: - case ISC_R_FAMILYNOSUPPORT: - break; - default: - goto cleanup_aclenv; + result = isc_nm_routeconnect(nm, route_connected, mgr, 0); + if (result != ISC_R_SUCCESS) { + isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_INFO, + "unable to open route socket: %s", + isc_result_totext(result)); } - mgr->task = NULL; - if (mgr->route != NULL) { - isc_task_attach(task, &mgr->task); - } - isc_refcount_init(&mgr->references, (mgr->route != NULL) ? 2 : 1); -#else /* ifdef USE_ROUTE_SOCKET */ isc_refcount_init(&mgr->references, 1); -#endif /* ifdef USE_ROUTE_SOCKET */ mgr->magic = IFMGR_MAGIC; *mgrp = mgr; @@ -278,25 +265,8 @@ ns_interfacemgr_create(isc_mem_t *mctx, ns_server_t *sctx, RUNTIME_CHECK(result == ISC_R_SUCCESS); } -#ifdef USE_ROUTE_SOCKET - if (mgr->route != NULL) { - isc_region_t r = { mgr->buf, sizeof(mgr->buf) }; - - result = isc_socket_recv(mgr->route, &r, 1, mgr->task, - route_event, mgr); - if (result != ISC_R_SUCCESS) { - isc_task_detach(&mgr->task); - isc_socket_detach(&mgr->route); - ns_interfacemgr_detach(&mgr); - } - } -#endif /* ifdef USE_ROUTE_SOCKET */ return (ISC_R_SUCCESS); -#ifdef USE_ROUTE_SOCKET -cleanup_aclenv: - dns_aclenv_detach(&mgr->aclenv); -#endif /* ifdef USE_ROUTE_SOCKET */ cleanup_listenon: ns_listenlist_detach(&mgr->listenon4); ns_listenlist_detach(&mgr->listenon6); @@ -314,14 +284,9 @@ ns_interfacemgr_destroy(ns_interfacemgr_t *mgr) { isc_refcount_destroy(&mgr->references); -#ifdef USE_ROUTE_SOCKET if (mgr->route != NULL) { - isc_socket_detach(&mgr->route); + isc_nmhandle_detach(&mgr->route); } - if (mgr->task != NULL) { - isc_task_detach(&mgr->task); - } -#endif /* ifdef USE_ROUTE_SOCKET */ dns_aclenv_detach(&mgr->aclenv); ns_listenlist_detach(&mgr->listenon4); ns_listenlist_detach(&mgr->listenon6); @@ -387,15 +352,13 @@ ns_interfacemgr_shutdown(ns_interfacemgr_t *mgr) { */ mgr->generation++; atomic_store(&mgr->shuttingdown, true); -#ifdef USE_ROUTE_SOCKET + LOCK(&mgr->lock); if (mgr->route != NULL) { - isc_socket_cancel(mgr->route, mgr->task, ISC_SOCKCANCEL_RECV); - isc_socket_detach(&mgr->route); - isc_task_detach(&mgr->task); + isc_nmhandle_detach(&mgr->route); } UNLOCK(&mgr->lock); -#endif /* ifdef USE_ROUTE_SOCKET */ + purge_old_interfaces(mgr); } From a55589f881bc4e4c1099e50b6d4ce84ffc7b5ba3 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Sun, 3 Oct 2021 00:27:52 -0700 Subject: [PATCH 4/6] remove all references to isc_socket and related types Removed socket.c, socket.h, and all references to isc_socket_t, isc_socketmgr_t, isc_sockevent_t, etc. --- bin/check/named-checkzone.c | 1 - bin/delv/delv.c | 5 +- bin/dig/dig.c | 1 + bin/dig/dighost.c | 4 +- bin/dig/dighost.h | 2 +- bin/dnssec/dnssec-signzone.c | 4 +- bin/named/bind9.xsl | 51 +- bin/named/include/named/globals.h | 31 +- bin/named/main.c | 20 +- bin/named/named.conf.rst | 2 +- bin/named/named.rst | 6 +- bin/named/server.c | 61 +- bin/named/statschannel.c | 20 - bin/nsupdate/nsupdate.c | 5 +- bin/rndc/rndc.c | 5 +- bin/tests/system/pipelined/pipequeries.c | 4 +- bin/tests/system/resolve.c | 6 +- bin/tests/system/statistics/tests.sh | 1 - bin/tests/system/tkey/keycreate.c | 4 +- bin/tests/system/tkey/keydelete.c | 4 +- bin/tests/test_client.c | 4 +- bin/tests/test_server.c | 4 +- bin/tools/mdig.c | 4 +- doc/arm/reference.rst | 8 +- doc/dev/dev.md | 26 - doc/man/named.8in | 6 +- doc/man/named.conf.5in | 2 +- doc/misc/options | 2 +- doc/misc/options.active | 2 +- doc/misc/options.grammar.rst | 2 +- lib/dns/client.c | 1 - lib/dns/include/dns/dispatch.h | 6 +- lib/dns/include/dns/resolver.h | 2 +- lib/dns/rbt.c | 1 - lib/dns/rbtdb.c | 1 - lib/dns/request.c | 1 - lib/dns/resolver.c | 3 +- lib/dns/tests/dispatch_test.c | 6 +- lib/dns/tests/dnstest.c | 6 +- lib/dns/tests/rbt_test.c | 1 - lib/dns/tests/resolver_test.c | 2 +- lib/isc/Makefile.am | 3 - lib/isc/httpd.c | 2 +- lib/isc/include/isc/managers.h | 8 +- lib/isc/include/isc/socket.h | 842 ---- lib/isc/include/isc/types.h | 11 +- lib/isc/managers.c | 28 +- lib/isc/netmgr/http.c | 1 + lib/isc/netmgr/netmgr.c | 7 + lib/isc/socket.c | 5445 ---------------------- lib/isc/socket_p.h | 79 - lib/isc/tests/Makefile.am | 2 - lib/isc/tests/isctest.c | 9 +- lib/isc/tests/isctest.h | 1 - lib/isc/tests/socket_test.c | 734 --- lib/isc/tests/task_test.c | 4 +- lib/isccfg/namedconf.c | 2 +- lib/ns/include/ns/interfacemgr.h | 7 +- lib/ns/interfacemgr.c | 23 +- lib/ns/tests/nstest.c | 12 +- lib/ns/tests/nstest.h | 1 - util/copyrights | 4 - 62 files changed, 114 insertions(+), 7438 deletions(-) delete mode 100644 lib/isc/include/isc/socket.h delete mode 100644 lib/isc/socket.c delete mode 100644 lib/isc/socket_p.h delete mode 100644 lib/isc/tests/socket_test.c diff --git a/bin/check/named-checkzone.c b/bin/check/named-checkzone.c index ec7a69db89..86c4fe00f7 100644 --- a/bin/check/named-checkzone.c +++ b/bin/check/named-checkzone.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/bin/delv/delv.c b/bin/delv/delv.c index 64ab63ba24..9adb5cd26c 100644 --- a/bin/delv/delv.c +++ b/bin/delv/delv.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -1742,7 +1741,7 @@ main(int argc, char *argv[]) { CHECK(isc_appctx_create(mctx, &actx)); - isc_managers_create(mctx, 1, 0, 0, &netmgr, &taskmgr, &timermgr, NULL); + isc_managers_create(mctx, 1, 0, &netmgr, &taskmgr, &timermgr); parse_args(argc, argv); @@ -1844,7 +1843,7 @@ cleanup: dns_client_detach(&client); } - isc_managers_destroy(&netmgr, &taskmgr, &timermgr, NULL); + isc_managers_destroy(&netmgr, &taskmgr, &timermgr); if (actx != NULL) { isc_appctx_destroy(&actx); diff --git a/bin/dig/dig.c b/bin/dig/dig.c index 2de1728a61..ab34d9741a 100644 --- a/bin/dig/dig.c +++ b/bin/dig/dig.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/bin/dig/dighost.c b/bin/dig/dighost.c index fc45fab332..62fc949b39 100644 --- a/bin/dig/dighost.c +++ b/bin/dig/dighost.c @@ -1356,7 +1356,7 @@ setup_libs(void) { isc_log_setdebuglevel(lctx, 0); - isc_managers_create(mctx, 1, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(mctx, 1, 0, &netmgr, &taskmgr, NULL); result = isc_task_create(taskmgr, 0, &global_task); check_result(result, "isc_task_create"); @@ -4248,7 +4248,7 @@ destroy_libs(void) { isc_task_detach(&global_task); } - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); LOCK_LOOKUP; isc_refcount_destroy(&recvcount); diff --git a/bin/dig/dighost.h b/bin/dig/dighost.h index b808b92c70..80b3d106cf 100644 --- a/bin/dig/dighost.h +++ b/bin/dig/dighost.h @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include diff --git a/bin/dnssec/dnssec-signzone.c b/bin/dnssec/dnssec-signzone.c index bca4a36456..2e0d5e893c 100644 --- a/bin/dnssec/dnssec-signzone.c +++ b/bin/dnssec/dnssec-signzone.c @@ -3990,7 +3990,7 @@ main(int argc, char *argv[]) { print_time(outfp); print_version(outfp); - isc_managers_create(mctx, ntasks, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(mctx, ntasks, 0, &netmgr, &taskmgr, NULL); main_task = NULL; result = isc_task_create(taskmgr, 0, &main_task); @@ -4041,7 +4041,7 @@ main(int argc, char *argv[]) { for (i = 0; i < (int)ntasks; i++) { isc_task_detach(&tasks[i]); } - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); isc_mem_put(mctx, tasks, ntasks * sizeof(isc_task_t *)); postsign(); TIME_NOW(&sign_finish); diff --git a/bin/named/bind9.xsl b/bin/named/bind9.xsl index 5078115f27..59bca67470 100644 --- a/bin/named/bind9.xsl +++ b/bin/named/bind9.xsl @@ -11,7 +11,7 @@ --> - + @@ -928,55 +928,6 @@ - -

Network Status

- - - - - - - - - - - - - - - even - odd - - - - - - - - - - - - -
IDNameTypeReferencesLocalAddressPeerAddressState
- - - - - - - - - - - - - - - -
-
-

Task Manager Configuration

diff --git a/bin/named/include/named/globals.h b/bin/named/include/named/globals.h index b7798265db..83f5e4839f 100644 --- a/bin/named/include/named/globals.h +++ b/bin/named/include/named/globals.h @@ -59,22 +59,21 @@ EXTERN bool named_g_run_done INIT(false); * for really short timers, another for client timers, and one * for zone timers. */ -EXTERN isc_timermgr_t *named_g_timermgr INIT(NULL); -EXTERN isc_socketmgr_t *named_g_socketmgr INIT(NULL); -EXTERN isc_nm_t *named_g_netmgr INIT(NULL); -EXTERN cfg_parser_t *named_g_parser INIT(NULL); -EXTERN cfg_parser_t *named_g_addparser INIT(NULL); -EXTERN const char *named_g_version INIT(PACKAGE_VERSION); -EXTERN const char *named_g_product INIT(PACKAGE_NAME); -EXTERN const char *named_g_description INIT(PACKAGE_DESCRIPTION); -EXTERN const char *named_g_srcid INIT(PACKAGE_SRCID); -EXTERN const char *named_g_configargs INIT(PACKAGE_CONFIGARGS); -EXTERN const char *named_g_builder INIT(PACKAGE_BUILDER); -EXTERN in_port_t named_g_port INIT(0); -EXTERN in_port_t named_g_tlsport INIT(0); -EXTERN in_port_t named_g_httpsport INIT(0); -EXTERN in_port_t named_g_httpport INIT(0); -EXTERN isc_dscp_t named_g_dscp INIT(-1); +EXTERN isc_timermgr_t *named_g_timermgr INIT(NULL); +EXTERN isc_nm_t *named_g_netmgr INIT(NULL); +EXTERN cfg_parser_t *named_g_parser INIT(NULL); +EXTERN cfg_parser_t *named_g_addparser INIT(NULL); +EXTERN const char *named_g_version INIT(PACKAGE_VERSION); +EXTERN const char *named_g_product INIT(PACKAGE_NAME); +EXTERN const char *named_g_description INIT(PACKAGE_DESCRIPTION); +EXTERN const char *named_g_srcid INIT(PACKAGE_SRCID); +EXTERN const char *named_g_configargs INIT(PACKAGE_CONFIGARGS); +EXTERN const char *named_g_builder INIT(PACKAGE_BUILDER); +EXTERN in_port_t named_g_port INIT(0); +EXTERN in_port_t named_g_tlsport INIT(0); +EXTERN in_port_t named_g_httpsport INIT(0); +EXTERN in_port_t named_g_httpport INIT(0); +EXTERN isc_dscp_t named_g_dscp INIT(-1); EXTERN in_port_t named_g_http_listener_clients INIT(0); EXTERN in_port_t named_g_http_streams_per_conn INIT(0); diff --git a/bin/named/main.c b/bin/named/main.c index f0bf372def..f2f200a3f4 100644 --- a/bin/named/main.c +++ b/bin/named/main.c @@ -120,7 +120,6 @@ static char absolute_conffile[PATH_MAX]; static char saved_command_line[4096] = { 0 }; static char ellipsis[5] = { 0 }; static char version[512]; -static unsigned int maxsocks = 0; static int maxudp = 0; /* @@ -824,8 +823,7 @@ parse_command_line(int argc, char *argv[]) { want_stats = true; break; case 'S': - maxsocks = parse_int(isc_commandline_argument, - "max number of sockets"); + /* Formerly maxsocks */ break; case 't': /* XXXJAB should we make a copy? */ @@ -897,7 +895,6 @@ parse_command_line(int argc, char *argv[]) { static isc_result_t create_managers(void) { isc_result_t result; - unsigned int socks; INSIST(named_g_cpus_detected > 0); @@ -921,30 +918,21 @@ create_managers(void) { named_g_udpdisp == 1 ? "" : "s"); result = isc_managers_create(named_g_mctx, named_g_cpus, - 0 /* quantum */, maxsocks, &named_g_netmgr, - &named_g_taskmgr, &named_g_timermgr, - &named_g_socketmgr); + 0 /* quantum */, &named_g_netmgr, + &named_g_taskmgr, &named_g_timermgr); if (result != ISC_R_SUCCESS) { return (result); } - isc_socketmgr_maxudp(named_g_socketmgr, maxudp); isc_nm_maxudp(named_g_netmgr, maxudp); - result = isc_socketmgr_getmaxsockets(named_g_socketmgr, &socks); - if (result == ISC_R_SUCCESS) { - isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL, - NAMED_LOGMODULE_SERVER, ISC_LOG_INFO, - "using up to %u sockets", socks); - } - return (ISC_R_SUCCESS); } static void destroy_managers(void) { isc_managers_destroy(&named_g_netmgr, &named_g_taskmgr, - &named_g_timermgr, &named_g_socketmgr); + &named_g_timermgr); } static void diff --git a/bin/named/named.conf.rst b/bin/named/named.conf.rst index 3793cfe123..e04891ab2a 100644 --- a/bin/named/named.conf.rst +++ b/bin/named/named.conf.rst @@ -391,7 +391,7 @@ OPTIONS request-ixfr boolean; request-nsid boolean; require-server-cookie boolean; - reserved-sockets integer; + reserved-sockets integer;// deprecated resolver-nonbackoff-tries integer; resolver-query-timeout integer; resolver-retry-interval integer; diff --git a/bin/named/named.rst b/bin/named/named.rst index cd7becc840..4fb58f1a76 100644 --- a/bin/named/named.rst +++ b/bin/named/named.rst @@ -29,7 +29,7 @@ named - Internet domain name server Synopsis ~~~~~~~~ -:program:`named` [ [**-4**] | [**-6**] ] [**-c** config-file] [**-d** debug-level] [**-D** string] [**-E** engine-name] [**-f**] [**-g**] [**-L** logfile] [**-M** option] [**-m** flag] [**-n** #cpus] [**-p** port] [**-s**] [**-S** #max-socks] [**-t** directory] [**-U** #listeners] [**-u** user] [**-v**] [**-V**] [**-X** lock-file] +:program:`named` [ [**-4**] | [**-6**] ] [**-c** config-file] [**-d** debug-level] [**-D** string] [**-E** engine-name] [**-f**] [**-g**] [**-L** logfile] [**-M** option] [**-m** flag] [**-n** #cpus] [**-p** port] [**-s**] [**-t** directory] [**-U** #listeners] [**-u** user] [**-v**] [**-V**] [**-X** lock-file] Description ~~~~~~~~~~~ @@ -126,9 +126,7 @@ Options removed or changed in a future release. ``-S #max-socks`` - This option allows ``named`` to use up to ``#max-socks`` sockets. The default value is - 21000 on systems built with default configuration options, and 4096 - on systems built with ``configure --with-tuning=small``. + This option is deprecated and no longer has any function. .. warning:: diff --git a/bin/named/server.c b/bin/named/server.c index c8e7232da4..e3cd057bb8 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #include #include @@ -8348,11 +8347,9 @@ load_configuration(const char *filename, named_server_t *server, isc_logconfig_t *logc = NULL; isc_portset_t *v4portset = NULL; isc_portset_t *v6portset = NULL; - isc_resourcevalue_t nfiles; isc_result_t result, tresult; uint32_t heartbeat_interval; uint32_t interface_interval; - uint32_t reserved; uint32_t udpsize; uint32_t transfer_message_size; uint32_t recv_tcp_buffer_size; @@ -8363,7 +8360,6 @@ load_configuration(const char *filename, named_server_t *server, named_cachelist_t cachelist, tmpcachelist; ns_altsecret_t *altsecret; ns_altsecretlist_t altsecrets, tmpaltsecrets; - unsigned int maxsocks; uint32_t softquota = 0; uint32_t max; uint64_t initial, idle, keepalive, advertised; @@ -8516,52 +8512,6 @@ load_configuration(const char *filename, named_server_t *server, */ CHECK(check_lockfile(server, config, first_time)); - /* - * Check if max number of open sockets that the system allows is - * sufficiently large. Failing this condition is not necessarily fatal, - * but may cause subsequent runtime failures for a busy recursive - * server. - */ - result = isc_socketmgr_getmaxsockets(named_g_socketmgr, &maxsocks); - if (result != ISC_R_SUCCESS) { - maxsocks = 0; - } - result = isc_resource_getcurlimit(isc_resource_openfiles, &nfiles); - if (result == ISC_R_SUCCESS && (isc_resourcevalue_t)maxsocks > nfiles) { - isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL, - NAMED_LOGMODULE_SERVER, ISC_LOG_WARNING, - "max open files (%" PRIu64 ")" - " is smaller than max sockets (%u)", - nfiles, maxsocks); - } - - /* - * Set the number of socket reserved for TCP, stdio etc. - */ - obj = NULL; - result = named_config_get(maps, "reserved-sockets", &obj); - INSIST(result == ISC_R_SUCCESS); - reserved = cfg_obj_asuint32(obj); - if (maxsocks != 0) { - if (maxsocks < 128U) { /* Prevent underflow. */ - reserved = 0; - } else if (reserved > maxsocks - 128U) { /* Minimum UDP space. - */ - reserved = maxsocks - 128; - } - } - /* Minimum TCP/stdio space. */ - if (reserved < 128U) { - reserved = 128; - } - if (reserved + 128U > maxsocks && maxsocks != 0) { - isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL, - NAMED_LOGMODULE_SERVER, ISC_LOG_WARNING, - "less than 128 UDP sockets available after " - "applying 'reserved-sockets' and 'maxsockets'"); - } - isc_socketmgr_setreserved(named_g_socketmgr, reserved); - #if defined(HAVE_GEOIP2) /* * Release any previously opened GeoIP2 databases. @@ -9871,11 +9821,11 @@ run_server(isc_task_t *task, isc_event_t *event) { geoip = NULL; #endif /* if defined(HAVE_GEOIP2) */ - CHECKFATAL(ns_interfacemgr_create( - named_g_mctx, server->sctx, named_g_taskmgr, - named_g_timermgr, named_g_socketmgr, named_g_netmgr, - named_g_dispatchmgr, server->task, geoip, - named_g_cpus, &server->interfacemgr), + CHECKFATAL(ns_interfacemgr_create(named_g_mctx, server->sctx, + named_g_taskmgr, named_g_timermgr, + named_g_netmgr, named_g_dispatchmgr, + server->task, geoip, named_g_cpus, + &server->interfacemgr), "creating interface manager"); CHECKFATAL(isc_timer_create(named_g_timermgr, isc_timertype_inactive, @@ -10182,7 +10132,6 @@ named_server_create(isc_mem_t *mctx, named_server_t **serverp) { CHECKFATAL(isc_stats_create(server->mctx, &server->sockstats, isc_sockstatscounter_max), "isc_stats_create"); - isc_socketmgr_setstats(named_g_socketmgr, server->sockstats); isc_nm_setstats(named_g_netmgr, server->sockstats); CHECKFATAL(isc_stats_create(named_g_mctx, &server->zonestats, diff --git a/bin/named/statschannel.c b/bin/named/statschannel.c index c9685d8e19..dcd1a3e231 100644 --- a/bin/named/statschannel.c +++ b/bin/named/statschannel.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -2313,13 +2312,6 @@ generatexml(named_server_t *server, uint32_t flags, int *buflen, } TRY0(xmlTextWriterEndElement(writer)); /* /views */ - if ((flags & STATS_XML_NET) != 0) { - TRY0(xmlTextWriterStartElement(writer, - ISC_XMLCHAR "socketmgr")); - TRY0(isc_socketmgr_renderxml(named_g_socketmgr, writer)); - TRY0(xmlTextWriterEndElement(writer)); /* /socketmgr */ - } - if ((flags & STATS_XML_TASKS) != 0) { TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "taskmgr")); TRY0(isc_taskmgr_renderxml(named_g_taskmgr, writer)); @@ -3103,7 +3095,6 @@ generatejson(named_server_t *server, size_t *msglen, const char **msg, if ((flags & STATS_JSON_NET) != 0) { /* socket stat counters */ - json_object *sockets; counters = json_object_new_object(); dumparg.result = ISC_R_SUCCESS; @@ -3124,17 +3115,6 @@ generatejson(named_server_t *server, size_t *msglen, const char **msg, } else { json_object_put(counters); } - - sockets = json_object_new_object(); - CHECKMEM(sockets); - - result = isc_socketmgr_renderjson(named_g_socketmgr, sockets); - if (result != ISC_R_SUCCESS) { - json_object_put(sockets); - goto cleanup; - } - - json_object_object_add(bindstats, "socketmgr", sockets); } if ((flags & STATS_JSON_TASKS) != 0) { diff --git a/bin/nsupdate/nsupdate.c b/bin/nsupdate/nsupdate.c index ab2f1b07f4..5202c5fab2 100644 --- a/bin/nsupdate/nsupdate.c +++ b/bin/nsupdate/nsupdate.c @@ -913,8 +913,7 @@ setup_system(void) { irs_resconf_destroy(&resconf); - result = isc_managers_create(gmctx, 1, 0, 0, &netmgr, &taskmgr, NULL, - NULL); + result = isc_managers_create(gmctx, 1, 0, &netmgr, &taskmgr, NULL); check_result(result, "isc_managers_create"); result = dns_dispatchmgr_create(gmctx, netmgr, &dispatchmgr); @@ -3320,7 +3319,7 @@ cleanup(void) { } ddebug("Shutting down managers"); - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); ddebug("Destroying event"); isc_event_free(&global_event); diff --git a/bin/rndc/rndc.c b/bin/rndc/rndc.c index f22c9fedf0..ba5ad175f0 100644 --- a/bin/rndc/rndc.c +++ b/bin/rndc/rndc.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -1029,7 +1028,7 @@ main(int argc, char **argv) { serial = isc_random32(); isc_mem_create(&rndc_mctx); - isc_managers_create(rndc_mctx, 1, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(rndc_mctx, 1, 0, &netmgr, &taskmgr, NULL); DO("create task", isc_task_create(taskmgr, 0, &rndc_task)); isc_log_create(rndc_mctx, &log, &logconfig); isc_log_setcontext(log); @@ -1084,7 +1083,7 @@ main(int argc, char **argv) { } isc_task_detach(&rndc_task); - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); /* * Note: when TCP connections are shut down, there will be a final diff --git a/bin/tests/system/pipelined/pipequeries.c b/bin/tests/system/pipelined/pipequeries.c index 3ee5fade07..78965bf975 100644 --- a/bin/tests/system/pipelined/pipequeries.c +++ b/bin/tests/system/pipelined/pipequeries.c @@ -262,7 +262,7 @@ main(int argc, char *argv[]) { RUNCHECK(dst_lib_init(mctx, NULL)); - isc_managers_create(mctx, 1, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(mctx, 1, 0, &netmgr, &taskmgr, NULL); RUNCHECK(isc_task_create(taskmgr, 0, &task)); RUNCHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); @@ -287,7 +287,7 @@ main(int argc, char *argv[]) { isc_task_shutdown(task); isc_task_detach(&task); - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); dst_lib_destroy(); diff --git a/bin/tests/system/resolve.c b/bin/tests/system/resolve.c index 1735f1cc6e..2cc9668368 100644 --- a/bin/tests/system/resolve.c +++ b/bin/tests/system/resolve.c @@ -61,7 +61,7 @@ isc_timermgr_t *ctxs_timermgr = NULL; static void ctxs_destroy(void) { - isc_managers_destroy(&ctxs_netmgr, &ctxs_taskmgr, &ctxs_timermgr, NULL); + isc_managers_destroy(&ctxs_netmgr, &ctxs_taskmgr, &ctxs_timermgr); if (ctxs_actx != NULL) { isc_appctx_destroy(&ctxs_actx); @@ -83,8 +83,8 @@ ctxs_init(void) { goto fail; } - isc_managers_create(ctxs_mctx, 1, 0, 0, &ctxs_netmgr, &ctxs_taskmgr, - &ctxs_timermgr, NULL); + isc_managers_create(ctxs_mctx, 1, 0, &ctxs_netmgr, &ctxs_taskmgr, + &ctxs_timermgr); result = isc_app_ctxstart(ctxs_actx); if (result != ISC_R_SUCCESS) { diff --git a/bin/tests/system/statistics/tests.sh b/bin/tests/system/statistics/tests.sh index b019dfac69..8db7741013 100644 --- a/bin/tests/system/statistics/tests.sh +++ b/bin/tests/system/statistics/tests.sh @@ -201,7 +201,6 @@ if $FEATURETEST --have-libxml2 && [ -x "${CURL}" ] && [ -x "${XSLTPROC}" ] ; th # grep "

Glue cache statistics

" xsltproc.out.${n} >/dev/null || ret=1 grep "

View _default" xsltproc.out.${n} >/dev/null || ret=1 grep "

Zone example" xsltproc.out.${n} >/dev/null || ret=1 - grep "

Network Status

" xsltproc.out.${n} >/dev/null || ret=1 grep "

Task Manager Configuration

" xsltproc.out.${n} >/dev/null || ret=1 grep "

Tasks

" xsltproc.out.${n} >/dev/null || ret=1 grep "

Memory Usage Summary

" xsltproc.out.${n} >/dev/null || ret=1 diff --git a/bin/tests/system/tkey/keycreate.c b/bin/tests/system/tkey/keycreate.c index 085648c178..cfd5a9aeac 100644 --- a/bin/tests/system/tkey/keycreate.c +++ b/bin/tests/system/tkey/keycreate.c @@ -219,7 +219,7 @@ main(int argc, char *argv[]) { RUNCHECK(dst_lib_init(mctx, NULL)); - isc_managers_create(mctx, 1, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(mctx, 1, 0, &netmgr, &taskmgr, NULL); RUNCHECK(isc_task_create(taskmgr, 0, &task)); RUNCHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); @@ -254,7 +254,7 @@ main(int argc, char *argv[]) { dns_dispatchmgr_detach(&dispatchmgr); isc_task_shutdown(task); isc_task_detach(&task); - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); dst_key_free(&ourkey); dns_tsigkey_detach(&initialkey); diff --git a/bin/tests/system/tkey/keydelete.c b/bin/tests/system/tkey/keydelete.c index 6175748957..69bec035fa 100644 --- a/bin/tests/system/tkey/keydelete.c +++ b/bin/tests/system/tkey/keydelete.c @@ -163,7 +163,7 @@ main(int argc, char **argv) { RUNCHECK(dst_lib_init(mctx, NULL)); - isc_managers_create(mctx, 1, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(mctx, 1, 0, &netmgr, &taskmgr, NULL); RUNCHECK(isc_task_create(taskmgr, 0, &task)); RUNCHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); @@ -197,7 +197,7 @@ main(int argc, char **argv) { dns_dispatchmgr_detach(&dispatchmgr); isc_task_shutdown(task); isc_task_detach(&task); - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); dns_tsigkeyring_detach(&ring); diff --git a/bin/tests/test_client.c b/bin/tests/test_client.c index 743adf44e6..339e664c3a 100644 --- a/bin/tests/test_client.c +++ b/bin/tests/test_client.c @@ -306,7 +306,7 @@ setup(void) { isc_mem_create(&mctx); - isc_managers_create(mctx, workers, 0, 0, &netmgr, NULL, NULL, NULL); + isc_managers_create(mctx, workers, 0, &netmgr, NULL, NULL); } static void @@ -315,7 +315,7 @@ teardown(void) { close(out); } - isc_managers_destroy(&netmgr, NULL, NULL, NULL); + isc_managers_destroy(&netmgr, NULL, NULL); isc_mem_destroy(&mctx); if (tls_ctx) { isc_tlsctx_free(&tls_ctx); diff --git a/bin/tests/test_server.c b/bin/tests/test_server.c index 5ef0428eb9..9e28fe8b1c 100644 --- a/bin/tests/test_server.c +++ b/bin/tests/test_server.c @@ -187,12 +187,12 @@ setup(void) { isc_mem_create(&mctx); - isc_managers_create(mctx, workers, 0, 0, &netmgr, NULL, NULL, NULL); + isc_managers_create(mctx, workers, 0, &netmgr, NULL, NULL); } static void teardown(void) { - isc_managers_destroy(&netmgr, NULL, NULL, NULL); + isc_managers_destroy(&netmgr, NULL, NULL); isc_mem_destroy(&mctx); if (tls_ctx) { isc_tlsctx_free(&tls_ctx); diff --git a/bin/tools/mdig.c b/bin/tools/mdig.c index 7f59d5cc01..364b18cbeb 100644 --- a/bin/tools/mdig.c +++ b/bin/tools/mdig.c @@ -2115,7 +2115,7 @@ main(int argc, char *argv[]) { fatal("can't choose between IPv4 and IPv6"); } - isc_managers_create(mctx, 1, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(mctx, 1, 0, &netmgr, &taskmgr, NULL); RUNCHECK(isc_task_create(taskmgr, 0, &task)); RUNCHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); @@ -2175,7 +2175,7 @@ main(int argc, char *argv[]) { isc_task_shutdown(task); isc_task_detach(&task); - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); dst_lib_destroy(); diff --git a/doc/arm/reference.rst b/doc/arm/reference.rst index 1d5ec4f522..07f793e731 100644 --- a/doc/arm/reference.rst +++ b/doc/arm/reference.rst @@ -3046,13 +3046,7 @@ system. most two places after the decimal point are significant. ``reserved-sockets`` - This sets the number of file descriptors reserved for TCP, stdio, etc. This - needs to be big enough to cover the number of interfaces ``named`` - listens on plus ``tcp-clients``, as well as to provide room for - outgoing TCP queries and incoming zone transfers. The default is - ``512``. The minimum value is ``128`` and the maximum value is - ``128`` fewer than maxsockets (-S). This option may be removed in the - future. + This option is deprecated and no longer has any effect. ``max-cache-size`` This sets the maximum amount of memory to use for an individual cache diff --git a/doc/dev/dev.md b/doc/dev/dev.md index c68f445ede..086c5a5d40 100644 --- a/doc/dev/dev.md +++ b/doc/dev/dev.md @@ -1370,32 +1370,6 @@ queue, the specified function will be called. Examples: -`isc_socket_recv()` calls the `recv()` system call asynchronously: rather -than waiting for data, it returns immediately, but it sets up an event to -be triggered when the `recv()` call completes; BIND can now do other work -instead of waiting for I/O. Once the `recv()` is finished, the -associated event is triggered. - - - /* - * Function to handle a completed recv() - */ - static void - recvdone(isc_task_t *task, isc_event_t *event) { - /* Arguments are in event->ev_arg. */ - } - - ... - - /* - * Call recv() on socket 'sock', put results into 'region', - * minimum read size 1, and call recvdone() with NULL as - * argument. (Note: 'sock' is already associated with a - * particular task, so that doesn't need to be specified - * here.) - */ - isc_socket_recv(sock, ®ion, 1, recvdone, NULL); - A timer is set for a specified time in the future, and the event will be triggered at that time. diff --git a/doc/man/named.8in b/doc/man/named.8in index 10647c1641..8d24841f90 100644 --- a/doc/man/named.8in +++ b/doc/man/named.8in @@ -32,7 +32,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] named \- Internet domain name server .SH SYNOPSIS .sp -\fBnamed\fP [ [\fB\-4\fP] | [\fB\-6\fP] ] [\fB\-c\fP config\-file] [\fB\-d\fP debug\-level] [\fB\-D\fP string] [\fB\-E\fP engine\-name] [\fB\-f\fP] [\fB\-g\fP] [\fB\-L\fP logfile] [\fB\-M\fP option] [\fB\-m\fP flag] [\fB\-n\fP #cpus] [\fB\-p\fP port] [\fB\-s\fP] [\fB\-S\fP #max\-socks] [\fB\-t\fP directory] [\fB\-U\fP #listeners] [\fB\-u\fP user] [\fB\-v\fP] [\fB\-V\fP] [\fB\-X\fP lock\-file] +\fBnamed\fP [ [\fB\-4\fP] | [\fB\-6\fP] ] [\fB\-c\fP config\-file] [\fB\-d\fP debug\-level] [\fB\-D\fP string] [\fB\-E\fP engine\-name] [\fB\-f\fP] [\fB\-g\fP] [\fB\-L\fP logfile] [\fB\-M\fP option] [\fB\-m\fP flag] [\fB\-n\fP #cpus] [\fB\-p\fP port] [\fB\-s\fP] [\fB\-t\fP directory] [\fB\-U\fP #listeners] [\fB\-u\fP user] [\fB\-v\fP] [\fB\-V\fP] [\fB\-X\fP lock\-file] .SH DESCRIPTION .sp \fBnamed\fP is a Domain Name System (DNS) server, part of the BIND 9 @@ -131,9 +131,7 @@ removed or changed in a future release. .INDENT 0.0 .TP .B \fB\-S #max\-socks\fP -This option allows \fBnamed\fP to use up to \fB#max\-socks\fP sockets. The default value is -21000 on systems built with default configuration options, and 4096 -on systems built with \fBconfigure \-\-with\-tuning=small\fP\&. +This option is deprecated and no longer has any function. .UNINDENT .sp \fBWARNING:\fP diff --git a/doc/man/named.conf.5in b/doc/man/named.conf.5in index b5bcfcccf8..7e129e4bf1 100644 --- a/doc/man/named.conf.5in +++ b/doc/man/named.conf.5in @@ -458,7 +458,7 @@ options { request\-ixfr boolean; request\-nsid boolean; require\-server\-cookie boolean; - reserved\-sockets integer; + reserved\-sockets integer;// deprecated resolver\-nonbackoff\-tries integer; resolver\-query\-timeout integer; resolver\-retry\-interval integer; diff --git a/doc/misc/options b/doc/misc/options index fb9db8c4c6..02b6f7b609 100644 --- a/doc/misc/options +++ b/doc/misc/options @@ -316,7 +316,7 @@ options { request-ixfr ; request-nsid ; require-server-cookie ; - reserved-sockets ; + reserved-sockets ; // deprecated resolver-nonbackoff-tries ; resolver-query-timeout ; resolver-retry-interval ; diff --git a/doc/misc/options.active b/doc/misc/options.active index c8d71c5532..491a025ed4 100644 --- a/doc/misc/options.active +++ b/doc/misc/options.active @@ -314,7 +314,7 @@ options { request-ixfr ; request-nsid ; require-server-cookie ; - reserved-sockets ; + reserved-sockets ; // deprecated resolver-nonbackoff-tries ; resolver-query-timeout ; resolver-retry-interval ; diff --git a/doc/misc/options.grammar.rst b/doc/misc/options.grammar.rst index 8adff8b414..2c440420f1 100644 --- a/doc/misc/options.grammar.rst +++ b/doc/misc/options.grammar.rst @@ -234,7 +234,7 @@ request-ixfr ; request-nsid ; require-server-cookie ; - reserved-sockets ; + reserved-sockets ; // deprecated resolver-nonbackoff-tries ; resolver-query-timeout ; resolver-retry-interval ; diff --git a/lib/dns/client.c b/lib/dns/client.c index 1e18f96e69..32f146cd4e 100644 --- a/lib/dns/client.c +++ b/lib/dns/client.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/lib/dns/include/dns/dispatch.h b/lib/dns/include/dns/dispatch.h index a53e66e746..83f40237f8 100644 --- a/lib/dns/include/dns/dispatch.h +++ b/lib/dns/include/dns/dispatch.h @@ -33,8 +33,7 @@ * * Security: * - *\li Depends on the isc_socket_t and dns_message_t for prevention of - * buffer overruns. + *\li Depends on dns_message_t for prevention of buffer overruns. * * Standards: * @@ -52,7 +51,6 @@ #include #include #include -#include #include #include @@ -187,7 +185,7 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, const isc_sockaddr_t *destaddr, isc_dscp_t dscp, dns_dispatch_t **dispp); /*%< - * Create a new dns_dispatch and attach it to the provided isc_socket_t. + * Create a new TCP dns_dispatch. * * Requires: * diff --git a/lib/dns/include/dns/resolver.h b/lib/dns/include/dns/resolver.h index 63edb07774..3aa0ab80ea 100644 --- a/lib/dns/include/dns/resolver.h +++ b/lib/dns/include/dns/resolver.h @@ -46,8 +46,8 @@ #include #include +#include #include -#include #include #include diff --git a/lib/dns/rbt.c b/lib/dns/rbt.c index b8e8a6c9e6..cf57ef20fb 100644 --- a/lib/dns/rbt.c +++ b/lib/dns/rbt.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index 8cc29119f9..08b9c29201 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/lib/dns/request.c b/lib/dns/request.c index 98a389b594..c3516be976 100644 --- a/lib/dns/request.c +++ b/lib/dns/request.c @@ -77,7 +77,6 @@ struct dns_request { dns_requestmgr_t *requestmgr; isc_buffer_t *tsig; dns_tsigkey_t *tsigkey; - isc_socketevent_t sendevent; isc_sockaddr_t destaddr; unsigned int timeout; unsigned int udpcount; diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index 8e2e85ae62..04330a3206 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -237,11 +237,10 @@ typedef struct query { isc_buffer_t buffer; isc_buffer_t *tsig; dns_tsigkey_t *tsigkey; - isc_socketevent_t sendevent; isc_dscp_t dscp; int ednsversion; unsigned int options; - isc_sockeventattr_t attributes; + unsigned int attributes; unsigned int udpsize; unsigned char data[512]; } resquery_t; diff --git a/lib/dns/tests/dispatch_test.c b/lib/dns/tests/dispatch_test.c index 93eac9f5ae..d965a0c3b8 100644 --- a/lib/dns/tests/dispatch_test.c +++ b/lib/dns/tests/dispatch_test.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -143,8 +142,7 @@ _setup(void **state) { close(sock); /* Create a secondary network manager */ - isc_managers_create(dt_mctx, ncpus, 0, 0, &connect_nm, NULL, NULL, - NULL); + isc_managers_create(dt_mctx, ncpus, 0, &connect_nm, NULL, NULL); isc_nm_settimeouts(netmgr, T_SERVER_INIT, T_SERVER_IDLE, T_SERVER_KEEPALIVE, T_SERVER_ADVERTISED); @@ -170,7 +168,7 @@ _teardown(void **state) { uv_sem_destroy(&sem); - isc_managers_destroy(&connect_nm, NULL, NULL, NULL); + isc_managers_destroy(&connect_nm, NULL, NULL); assert_null(connect_nm); dns_test_end(); diff --git a/lib/dns/tests/dnstest.c b/lib/dns/tests/dnstest.c index e415e7a0e0..de646cb565 100644 --- a/lib/dns/tests/dnstest.c +++ b/lib/dns/tests/dnstest.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -99,7 +98,7 @@ cleanup_managers(void) { isc_managers_destroy(netmgr == NULL ? NULL : &netmgr, taskmgr == NULL ? NULL : &taskmgr, - timermgr == NULL ? NULL : &timermgr, NULL); + timermgr == NULL ? NULL : &timermgr); if (app_running) { isc_app_finish(); @@ -111,8 +110,7 @@ create_managers(void) { isc_result_t result; ncpus = isc_os_ncpus(); - isc_managers_create(dt_mctx, ncpus, 0, 0, &netmgr, &taskmgr, &timermgr, - NULL); + isc_managers_create(dt_mctx, ncpus, 0, &netmgr, &taskmgr, &timermgr); CHECK(isc_task_create(taskmgr, 0, &maintask)); return (ISC_R_SUCCESS); diff --git a/lib/dns/tests/rbt_test.c b/lib/dns/tests/rbt_test.c index 3839ebc0cb..ad761efd91 100644 --- a/lib/dns/tests/rbt_test.c +++ b/lib/dns/tests/rbt_test.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include diff --git a/lib/dns/tests/resolver_test.c b/lib/dns/tests/resolver_test.c index 866498e35e..8113693eac 100644 --- a/lib/dns/tests/resolver_test.c +++ b/lib/dns/tests/resolver_test.c @@ -23,8 +23,8 @@ #include #include +#include #include -#include #include #include #include diff --git a/lib/isc/Makefile.am b/lib/isc/Makefile.am index 639b1b7b2d..70358f8a82 100644 --- a/lib/isc/Makefile.am +++ b/lib/isc/Makefile.am @@ -82,7 +82,6 @@ libisc_la_HEADERS = \ include/isc/serial.h \ include/isc/siphash.h \ include/isc/sockaddr.h \ - include/isc/socket.h \ include/isc/stat.h \ include/isc/stats.h \ include/isc/stdatomic.h \ @@ -186,8 +185,6 @@ libisc_la_SOURCES = \ serial.c \ siphash.c \ sockaddr.c \ - socket.c \ - socket_p.h \ stats.c \ stdio.c \ stdtime.c \ diff --git a/lib/isc/httpd.c b/lib/isc/httpd.c index 5daf349426..b71d323823 100644 --- a/lib/isc/httpd.c +++ b/lib/isc/httpd.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/lib/isc/include/isc/managers.h b/lib/isc/include/isc/managers.h index 3b3227b09e..8543dbdcf9 100644 --- a/lib/isc/include/isc/managers.h +++ b/lib/isc/include/isc/managers.h @@ -13,7 +13,6 @@ #include #include -#include #include #include @@ -21,10 +20,9 @@ typedef struct isc_managers isc_managers_t; isc_result_t isc_managers_create(isc_mem_t *mctx, size_t workers, size_t quantum, - size_t sockets, isc_nm_t **netmgrp, - isc_taskmgr_t **taskmgrp, isc_timermgr_t **timermgrp, - isc_socketmgr_t **socketmgrp); + isc_nm_t **netmgrp, isc_taskmgr_t **taskmgrp, + isc_timermgr_t **timermgrp); void isc_managers_destroy(isc_nm_t **netmgrp, isc_taskmgr_t **taskmgrp, - isc_timermgr_t **timermgrp, isc_socketmgr_t **socketmgrp); + isc_timermgr_t **timermgrp); diff --git a/lib/isc/include/isc/socket.h b/lib/isc/include/isc/socket.h deleted file mode 100644 index 10ac659a8b..0000000000 --- a/lib/isc/include/isc/socket.h +++ /dev/null @@ -1,842 +0,0 @@ -/* - * Copyright (C) Internet Systems Consortium, Inc. ("ISC") - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, you can obtain one at https://mozilla.org/MPL/2.0/. - * - * See the COPYRIGHT file distributed with this work for additional - * information regarding copyright ownership. - */ - -#pragma once - -/***** -***** Module Info -*****/ - -/*! \file isc/socket.h - * \brief Provides TCP and UDP sockets for network I/O. The sockets are event - * sources in the task system. - * - * When I/O completes, a completion event for the socket is posted to the - * event queue of the task which requested the I/O. - * - * \li MP: - * The module ensures appropriate synchronization of data structures it - * creates and manipulates. - * Clients of this module must not be holding a socket's task's lock when - * making a call that affects that socket. Failure to follow this rule - * can result in deadlock. - * The caller must ensure that isc_socketmgr_destroy() is called only - * once for a given manager. - * - * \li Reliability: - * No anticipated impact. - * - * \li Resources: - * TBS - * - * \li Security: - * No anticipated impact. - * - * \li Standards: - * None. - */ - -/*** - *** Imports - ***/ - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -ISC_LANG_BEGINDECLS - -/*** - *** Constants - ***/ - -/*% - * Maximum number of buffers in a scatter/gather read/write. The operating - * system in use must support at least this number (plus one on some.) - */ -#define ISC_SOCKET_MAXSCATTERGATHER 8 - -/*@{*/ -/*! - * Socket options: - * - * _REUSEADDRESS: Set SO_REUSEADDR prior to calling bind(), - * if a non-zero port is specified (applies to - * AF_INET and AF_INET6). - */ -typedef enum { - ISC_SOCKET_REUSEADDRESS = 0x01U, -} isc_socket_options_t; -/*@}*/ - -/*@{*/ -/*! - * _ATTACHED: Internal use only. - * _TRUNC: Packet was truncated on receive. - * _CTRUNC: Packet control information was truncated. This can - * indicate that the packet is not complete, even though - * all the data is valid. - * _TIMESTAMP: The timestamp member is valid. - * _PKTINFO: The pktinfo member is valid. - * _MULTICAST: The UDP packet was received via a multicast transmission. - * _DSCP: The UDP DSCP value is valid. - * _USEMINMTU: Set the per packet IPV6_USE_MIN_MTU flag. - */ -typedef enum { - ISC_SOCKEVENTATTR_ATTACHED = 0x10000000U, /* internal */ - ISC_SOCKEVENTATTR_TRUNC = 0x00800000U, /* public */ - ISC_SOCKEVENTATTR_CTRUNC = 0x00400000U, /* public */ - ISC_SOCKEVENTATTR_TIMESTAMP = 0x00200000U, /* public */ - ISC_SOCKEVENTATTR_PKTINFO = 0x00100000U, /* public */ - ISC_SOCKEVENTATTR_MULTICAST = 0x00080000U, /* public */ - ISC_SOCKEVENTATTR_DSCP = 0x00040000U, /* public */ - ISC_SOCKEVENTATTR_USEMINMTU = 0x00020000U /* public */ -} isc_sockeventattr_t; -/*@}*/ - -/*** - *** Types - ***/ - -struct isc_socketevent { - ISC_EVENT_COMMON(isc_socketevent_t); - isc_result_t result; /*%< OK, EOF, whatever else */ - unsigned int minimum; /*%< minimum i/o for event */ - unsigned int n; /*%< bytes read or written */ - unsigned int offset; /*%< offset into buffer list */ - isc_region_t region; /*%< for single-buffer i/o */ - isc_sockaddr_t address; /*%< source address */ - isc_time_t timestamp; /*%< timestamp of packet recv */ - struct in6_pktinfo pktinfo; /*%< ipv6 pktinfo */ - isc_sockeventattr_t attributes; /*%< see isc_sockeventattr_t - * enum */ - isc_eventdestructor_t destroy; /*%< original destructor */ - unsigned int dscp; /*%< UDP dscp value */ -}; - -typedef struct isc_socket_newconnev isc_socket_newconnev_t; -struct isc_socket_newconnev { - ISC_EVENT_COMMON(isc_socket_newconnev_t); - isc_socket_t *newsocket; - isc_result_t result; /*%< OK, EOF, whatever else */ - isc_sockaddr_t address; /*%< source address */ -}; - -typedef struct isc_socket_connev isc_socket_connev_t; -struct isc_socket_connev { - ISC_EVENT_COMMON(isc_socket_connev_t); - isc_result_t result; /*%< OK, EOF, whatever else */ -}; - -#define ISC_SOCKEVENT_ANYEVENT (0) -#define ISC_SOCKEVENT_RECVDONE (ISC_EVENTCLASS_SOCKET + 1) -#define ISC_SOCKEVENT_SENDDONE (ISC_EVENTCLASS_SOCKET + 2) -#define ISC_SOCKEVENT_NEWCONN (ISC_EVENTCLASS_SOCKET + 3) -#define ISC_SOCKEVENT_CONNECT (ISC_EVENTCLASS_SOCKET + 4) - -/* - * Internal events. - */ -#define ISC_SOCKEVENT_INTR (ISC_EVENTCLASS_SOCKET + 256) -#define ISC_SOCKEVENT_INTW (ISC_EVENTCLASS_SOCKET + 257) - -typedef enum { - isc_sockettype_udp = 1, - isc_sockettype_tcp = 2, - isc_sockettype_unix = 3, - isc_sockettype_raw = 4 -} isc_sockettype_t; - -/*@{*/ -/*! - * How a socket should be shutdown in isc_socket_shutdown() calls. - */ -#define ISC_SOCKSHUT_RECV 0x00000001 /*%< close read side */ -#define ISC_SOCKSHUT_SEND 0x00000002 /*%< close write side */ -#define ISC_SOCKSHUT_ALL 0x00000003 /*%< close them all */ -/*@}*/ - -/*@{*/ -/*! - * What I/O events to cancel in isc_socket_cancel() calls. - */ -#define ISC_SOCKCANCEL_RECV 0x00000001 /*%< cancel recv */ -#define ISC_SOCKCANCEL_SEND 0x00000002 /*%< cancel send */ -#define ISC_SOCKCANCEL_ACCEPT 0x00000004 /*%< cancel accept */ -#define ISC_SOCKCANCEL_CONNECT 0x00000008 /*%< cancel connect */ -#define ISC_SOCKCANCEL_ALL 0x0000000f /*%< cancel everything */ -/*@}*/ - -/*@{*/ -/*! - * Flags for isc_socket_send() and isc_socket_recv() calls. - */ -#define ISC_SOCKFLAG_IMMEDIATE 0x00000001 /*%< send event only if needed */ -#define ISC_SOCKFLAG_NORETRY 0x00000002 /*%< drop failed UDP sends */ -/*@}*/ - -/*** - *** Socket and Socket Manager Functions - *** - *** Note: all Ensures conditions apply only if the result is success for - *** those functions which return an isc_result. - ***/ - -isc_result_t -isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, - isc_socket_t **socketp); -/*%< - * Create a new 'type' socket managed by 'manager'. - * - * Note: - * - *\li 'pf' is the desired protocol family, e.g. PF_INET or PF_INET6. - * - * Requires: - * - *\li 'manager' is a valid manager - * - *\li 'socketp' is a valid pointer, and *socketp == NULL - * - * Ensures: - * - * '*socketp' is attached to the newly created socket - * - * Returns: - * - *\li #ISC_R_SUCCESS - *\li #ISC_R_NOMEMORY - *\li #ISC_R_NORESOURCES - *\li #ISC_R_UNEXPECTED - */ - -void -isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how); -/*%< - * Cancel pending I/O of the type specified by "how". - * - * Note: if "task" is NULL, then the cancel applies to all tasks using the - * socket. - * - * Requires: - * - * \li "socket" is a valid socket - * - * \li "task" is NULL or a valid task - * - * "how" is a bitmask describing the type of cancellation to perform. - * The type ISC_SOCKCANCEL_ALL will cancel all pending I/O on this - * socket. - * - * \li ISC_SOCKCANCEL_RECV: - * Cancel pending isc_socket_recv() calls. - * - * \li ISC_SOCKCANCEL_SEND: - * Cancel pending isc_socket_send() and isc_socket_sendto() calls. - * - * \li ISC_SOCKCANCEL_ACCEPT: - * Cancel pending isc_socket_accept() calls. - * - * \li ISC_SOCKCANCEL_CONNECT: - * Cancel pending isc_socket_connect() call. - */ - -void -isc_socket_shutdown(isc_socket_t *sock, unsigned int how); -/*%< - * Shutdown 'socket' according to 'how'. - * - * Requires: - * - * \li 'socket' is a valid socket. - * - * \li 'task' is NULL or is a valid task. - * - * \li If 'how' is 'ISC_SOCKSHUT_RECV' or 'ISC_SOCKSHUT_ALL' then - * - * The read queue must be empty. - * - * No further read requests may be made. - * - * \li If 'how' is 'ISC_SOCKSHUT_SEND' or 'ISC_SOCKSHUT_ALL' then - * - * The write queue must be empty. - * - * No further write requests may be made. - */ - -void -isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp); -/*%< - * Attach *socketp to socket. - * - * Requires: - * - * \li 'socket' is a valid socket. - * - * \li 'socketp' points to a NULL socket. - * - * Ensures: - * - * \li *socketp is attached to socket. - */ - -void -isc_socket_detach(isc_socket_t **socketp); -/*%< - * Detach *socketp from its socket. - * - * Requires: - * - * \li 'socketp' points to a valid socket. - * - * \li If '*socketp' is the last reference to the socket, - * then: - * - * There must be no pending I/O requests. - * - * Ensures: - * - * \li *socketp is NULL. - * - * \li If '*socketp' is the last reference to the socket, - * then: - * - * The socket will be shutdown (both reading and writing) - * for all tasks. - * - * All resources used by the socket have been freed - */ - -isc_result_t -isc_socket_open(isc_socket_t *sock); -/*%< - * Open a new socket file descriptor of the given socket structure. It simply - * opens a new descriptor; all of the other parameters including the socket - * type are inherited from the existing socket. This function is provided to - * avoid overhead of destroying and creating sockets when many short-lived - * sockets are frequently opened and closed. When the efficiency is not an - * issue, it should be safer to detach the unused socket and re-create a new - * one. This optimization may not be available for some systems, in which - * case this function will return ISC_R_NOTIMPLEMENTED and must not be used. - * - * Requires: - * - * \li there must be no other reference to this socket. - * - * \li 'socket' is a valid and previously closed by isc_socket_close() - * - * Returns: - * Same as isc_socket_create(). - * \li ISC_R_NOTIMPLEMENTED - */ - -isc_result_t -isc_socket_close(isc_socket_t *sock); -/*%< - * Close a socket file descriptor of the given socket structure. This function - * is provided as an alternative to destroying an unused socket when overhead - * destroying/re-creating sockets can be significant, and is expected to be - * used with isc_socket_open(). This optimization may not be available for some - * systems, in which case this function will return ISC_R_NOTIMPLEMENTED and - * must not be used. - * - * Requires: - * - * \li The socket must have a valid descriptor. - * - * \li There must be no other reference to this socket. - * - * \li There must be no pending I/O requests. - * - * Returns: - * \li #ISC_R_NOTIMPLEMENTED - */ - -isc_result_t -isc_socket_bind(isc_socket_t *sock, const isc_sockaddr_t *addressp, - isc_socket_options_t options); -/*%< - * Bind 'socket' to '*addressp'. - * - * Requires: - * - * \li 'socket' is a valid socket - * - * \li 'addressp' points to a valid isc_sockaddr. - * - * Returns: - * - * \li ISC_R_SUCCESS - * \li ISC_R_NOPERM - * \li ISC_R_ADDRNOTAVAIL - * \li ISC_R_ADDRINUSE - * \li ISC_R_BOUND - * \li ISC_R_UNEXPECTED - */ - -isc_result_t -isc_socket_filter(isc_socket_t *sock, const char *filter); -/*%< - * Inform the kernel that it should perform accept filtering. - * If filter is NULL the current filter will be removed. - */ - -isc_result_t -isc_socket_listen(isc_socket_t *sock, unsigned int backlog); -/*%< - * Set listen mode on the socket. After this call, the only function that - * can be used (other than attach and detach) is isc_socket_accept(). - * - * Notes: - * - * \li 'backlog' is as in the UNIX system call listen() and may be - * ignored by non-UNIX implementations. - * - * \li If 'backlog' is zero, a reasonable system default is used, usually - * SOMAXCONN. - * - * Requires: - * - * \li 'socket' is a valid, bound TCP socket or a valid, bound UNIX socket. - * - * Returns: - * - * \li ISC_R_SUCCESS - * \li ISC_R_UNEXPECTED - */ - -isc_result_t -isc_socket_accept(isc_socket_t *sock, isc_task_t *task, isc_taskaction_t action, - void *arg); -/*%< - * Queue accept event. When a new connection is received, the task will - * get an ISC_SOCKEVENT_NEWCONN event with the sender set to the listen - * socket. The new socket structure is sent inside the isc_socket_newconnev_t - * event type, and is attached to the task 'task'. - * - * REQUIRES: - * \li 'socket' is a valid TCP socket that isc_socket_listen() was called - * on. - * - * \li 'task' is a valid task - * - * \li 'action' is a valid action - * - * RETURNS: - * \li ISC_R_SUCCESS - * \li ISC_R_NOMEMORY - * \li ISC_R_UNEXPECTED - */ - -isc_result_t -isc_socket_connect(isc_socket_t *sock, const isc_sockaddr_t *addressp, - isc_task_t *task, isc_taskaction_t action, void *arg); -/*%< - * Connect 'socket' to peer with address *saddr. When the connection - * succeeds, or when an error occurs, a CONNECT event with action 'action' - * and arg 'arg' will be posted to the event queue for 'task'. - * - * Requires: - * - * \li 'socket' is a valid TCP socket - * - * \li 'addressp' points to a valid isc_sockaddr - * - * \li 'task' is a valid task - * - * \li 'action' is a valid action - * - * Returns: - * - * \li ISC_R_SUCCESS - * \li ISC_R_NOMEMORY - * \li ISC_R_UNEXPECTED - * - * Posted event's result code: - * - * \li ISC_R_SUCCESS - * \li ISC_R_TIMEDOUT - * \li ISC_R_CONNREFUSED - * \li ISC_R_NETUNREACH - * \li ISC_R_UNEXPECTED - */ - -isc_result_t -isc_socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp); -/*%< - * Get the name of the peer connected to 'socket'. - * - * Requires: - * - * \li 'socket' is a valid TCP socket. - * - * Returns: - * - * \li ISC_R_SUCCESS - * \li ISC_R_TOOSMALL - * \li ISC_R_UNEXPECTED - */ - -isc_result_t -isc_socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp); -/*%< - * Get the name of 'socket'. - * - * Requires: - * - * \li 'socket' is a valid socket. - * - * Returns: - * - * \li ISC_R_SUCCESS - * \li ISC_R_TOOSMALL - * \li ISC_R_UNEXPECTED - */ - -/*@{*/ -isc_result_t -isc_socket_recv(isc_socket_t *sock, isc_region_t *region, unsigned int minimum, - isc_task_t *task, isc_taskaction_t action, void *arg); - -isc_result_t -isc_socket_recv2(isc_socket_t *sock, isc_region_t *region, unsigned int minimum, - isc_task_t *task, isc_socketevent_t *event, - unsigned int flags); - -/*! - * Receive from 'socket', storing the results in region. - * - * Notes: - * - *\li Let 'length' refer to the length of 'region' or to the sum of all - * available regions in the list of buffers '*buflist'. - * - *\li If 'minimum' is non-zero and at least that many bytes are read, - * the completion event will be posted to the task 'task.' If minimum - * is zero, the exact number of bytes requested in the region must - * be read for an event to be posted. This only makes sense for TCP - * connections, and is always set to 1 byte for UDP. - * - *\li The read will complete when the desired number of bytes have been - * read, if end-of-input occurs, or if an error occurs. A read done - * event with the given 'action' and 'arg' will be posted to the - * event queue of 'task'. - * - *\li The caller may not modify 'region', the buffers which are passed - * into this function, or any data they refer to until the completion - * event is received. - * - *\li For isc_socket_recv2(): - * 'event' is not NULL, and the non-socket specific fields are - * expected to be initialized. - * - *\li For isc_socket_recv2(): - * The only defined value for 'flags' is ISC_SOCKFLAG_IMMEDIATE. If - * set and the operation completes, the return value will be - * ISC_R_SUCCESS and the event will be filled in and not sent. If the - * operation does not complete, the return value will be - * ISC_R_INPROGRESS and the event will be sent when the operation - * completes. - * - * Requires: - * - *\li 'socket' is a valid, bound socket. - * - *\li For isc_socket_recv(): - * 'region' is a valid region - * - *\li For isc_socket_recvv(): - * 'buflist' is non-NULL, and '*buflist' contain at least one buffer. - * - *\li 'task' is a valid task - * - *\li For isc_socket_recv() and isc_socket_recvv(): - * action != NULL and is a valid action - * - *\li For isc_socket_recv2(): - * event != NULL - * - * Returns: - * - *\li #ISC_R_SUCCESS - *\li #ISC_R_INPROGRESS - *\li #ISC_R_NOMEMORY - *\li #ISC_R_UNEXPECTED - * - * Event results: - * - *\li #ISC_R_SUCCESS - *\li #ISC_R_UNEXPECTED - *\li XXX needs other net-type errors - */ -/*@}*/ - -/*@{*/ -isc_result_t -isc_socket_send(isc_socket_t *sock, isc_region_t *region, isc_task_t *task, - isc_taskaction_t action, void *arg); -isc_result_t -isc_socket_sendto(isc_socket_t *sock, isc_region_t *region, isc_task_t *task, - isc_taskaction_t action, void *arg, - const isc_sockaddr_t *address, struct in6_pktinfo *pktinfo); -isc_result_t -isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region, isc_task_t *task, - const isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, - isc_socketevent_t *event, unsigned int flags); - -/*! - * Send the contents of 'region' to the socket's peer. - * - * Notes: - * - *\li Shutting down the requestor's task *may* result in any - * still pending writes being dropped or completed, depending on the - * underlying OS implementation. - * - *\li If 'action' is NULL, then no completion event will be posted. - * - *\li The caller may not modify 'region', the buffers which are passed - * into this function, or any data they refer to until the completion - * event is received. - * - *\li For isc_socket_sendto2(): - * 'event' is not NULL, and the non-socket specific fields are - * expected to be initialized. - * - *\li For isc_socket_sendto2(): - * The only defined values for 'flags' are ISC_SOCKFLAG_IMMEDIATE - * and ISC_SOCKFLAG_NORETRY. - * - *\li If ISC_SOCKFLAG_IMMEDIATE is set and the operation completes, the - * return value will be ISC_R_SUCCESS and the event will be filled - * in and not sent. If the operation does not complete, the return - * value will be ISC_R_INPROGRESS and the event will be sent when - * the operation completes. - * - *\li ISC_SOCKFLAG_NORETRY can only be set for UDP sockets. If set - * and the send operation fails due to a transient error, the send - * will not be retried and the error will be indicated in the event. - * Using this option along with ISC_SOCKFLAG_IMMEDIATE allows the caller - * to specify a region that is allocated on the stack. - * - * Requires: - * - *\li 'socket' is a valid, bound socket. - * - *\li For isc_socket_send(): - * 'region' is a valid region - * - *\li For isc_socket_sendv() and isc_socket_sendtov(): - * 'buflist' is non-NULL, and '*buflist' contain at least one buffer. - * - *\li 'task' is a valid task - * - *\li For isc_socket_sendv(), isc_socket_sendtov(), isc_socket_send(), and - * isc_socket_sendto(): - * action == NULL or is a valid action - * - *\li For isc_socket_sendto2(): - * event != NULL - * - * Returns: - * - *\li #ISC_R_SUCCESS - *\li #ISC_R_INPROGRESS - *\li #ISC_R_NOMEMORY - *\li #ISC_R_UNEXPECTED - * - * Event results: - * - *\li #ISC_R_SUCCESS - *\li #ISC_R_UNEXPECTED - *\li XXX needs other net-type errors - */ -/*@}*/ - -isc_result_t -isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp); -/*%< - * Returns in "*nsockp" the maximum number of sockets this manager may open. - * - * Requires: - * - *\li '*manager' is a valid isc_socketmgr_t. - *\li 'nsockp' is not NULL. - * - * Returns: - * - *\li #ISC_R_SUCCESS - *\li #ISC_R_NOTIMPLEMENTED - */ - -void -isc_socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats); -/*%< - * Set a general socket statistics counter set 'stats' for 'manager'. - * - * Requires: - * \li 'manager' is valid, hasn't opened any socket, and doesn't have - * stats already set. - * - *\li stats is a valid statistics supporting socket statistics counters - * (see above). - */ - -isc_sockettype_t -isc_socket_gettype(isc_socket_t *sock); -/*%< - * Returns the socket type for "sock." - * - * Requires: - * - *\li "sock" is a valid socket. - */ - -/*@{*/ -void -isc_socket_ipv6only(isc_socket_t *sock, bool yes); -/*%< - * If the socket is an IPv6 socket set/clear the IPV6_IPV6ONLY socket - * option if the host OS supports this option. - * - * Requires: - *\li 'sock' is a valid socket. - */ -/*@}*/ - -void -isc_socket_dscp(isc_socket_t *sock, isc_dscp_t dscp); -/*%< - * Sets the Differentiated Services Code Point (DSCP) field for packets - * transmitted on this socket. If 'dscp' is -1, return immediately. - * - * Requires: - *\li 'sock' is a valid socket. - */ - -isc_socketevent_t * -isc_socket_socketevent(isc_mem_t *mctx, void *sender, isc_eventtype_t eventtype, - isc_taskaction_t action, void *arg); -/*%< - * Get a isc_socketevent_t to be used with isc_socket_sendto2(), etc. - */ - -void -isc_socket_cleanunix(const isc_sockaddr_t *addr, bool active); - -/*%< - * Cleanup UNIX domain sockets in the file-system. If 'active' is true - * then just unlink the socket. If 'active' is false try to determine - * if there is a listener of the socket or not. If no listener is found - * then unlink socket. - * - * Prior to unlinking the path is tested to see if it a socket. - * - * Note: there are a number of race conditions which cannot be avoided - * both in the filesystem and any application using UNIX domain - * sockets (e.g. socket is tested between bind() and listen(), - * the socket is deleted and replaced in the file-system between - * stat() and unlink()). - */ - -isc_result_t -isc_socket_permunix(const isc_sockaddr_t *sockaddr, uint32_t perm, - uint32_t owner, uint32_t group); -/*%< - * Set ownership and file permissions on the UNIX domain socket. - * - * Note: On Solaris this secures the directory containing - * the socket as Solaris do not honour the filesystem - * permissions on the socket. - * - * Requires: - * \li 'sockaddr' to be a valid UNIX domain sockaddr. - * - * Returns: - * \li #ISC_R_SUCCESS - * \li #ISC_R_FAILURE - */ - -void -isc_socket_setname(isc_socket_t *socket, const char *name, void *tag); -/*%< - * Set the name and optional tag for a socket. This allows tracking of the - * owner or purpose for this socket, and is useful for tracing and statistics - * reporting. - */ - -const char * -isc_socket_getname(isc_socket_t *socket); -/*%< - * Get the name associated with a socket, if any. - */ - -void * -isc_socket_gettag(isc_socket_t *socket); -/*%< - * Get the tag associated with a socket, if any. - */ - -int -isc_socket_getfd(isc_socket_t *socket); -/*%< - * Get the file descriptor associated with a socket - */ - -void -isc_socketmgr_setreserved(isc_socketmgr_t *mgr, uint32_t); -/*%< - * Temporary. For use by named only. - */ - -void -isc_socketmgr_maxudp(isc_socketmgr_t *mgr, unsigned int maxudp); -/*%< - * Test interface. Drop UDP packet > 'maxudp'. - */ - -bool -isc_socket_hasreuseport(void); -/*%< - * Return true if there is SO_REUSEPORT support - */ - -#ifdef HAVE_LIBXML2 -int -isc_socketmgr_renderxml(isc_socketmgr_t *mgr, void *writer0); -/*%< - * Render internal statistics and other state into the XML document. - */ -#endif /* HAVE_LIBXML2 */ - -#ifdef HAVE_JSON_C -isc_result_t -isc_socketmgr_renderjson(isc_socketmgr_t *mgr, void *stats0); -/*%< - * Render internal statistics and other state into JSON format. - */ -#endif /* HAVE_JSON_C */ - -/*%< - * See isc_socketmgr_create() above. - */ -typedef isc_result_t (*isc_socketmgrcreatefunc_t)(isc_mem_t *mctx, - isc_socketmgr_t **managerp); - -ISC_LANG_ENDDECLS diff --git a/lib/isc/include/isc/types.h b/lib/isc/include/isc/types.h index 78ed309f12..b68365cd93 100644 --- a/lib/isc/include/isc/types.h +++ b/lib/isc/include/isc/types.h @@ -77,13 +77,10 @@ typedef struct isc_rwlock isc_rwlock_t; /*%< Read Write Lock */ typedef struct isc_sockaddr isc_sockaddr_t; /*%< Socket Address */ typedef ISC_LIST(isc_sockaddr_t) isc_sockaddrlist_t; /*%< Socket Address List * */ -typedef struct isc_socket isc_socket_t; /*%< Socket */ -typedef struct isc_socketevent isc_socketevent_t; /*%< Socket Event */ -typedef struct isc_socketmgr isc_socketmgr_t; /*%< Socket Manager */ -typedef struct isc_stats isc_stats_t; /*%< Statistics */ -typedef int_fast64_t isc_statscounter_t; -typedef struct isc_symtab isc_symtab_t; /*%< Symbol Table */ -typedef struct isc_task isc_task_t; /*%< Task */ +typedef struct isc_stats isc_stats_t; /*%< Statistics */ +typedef int_fast64_t isc_statscounter_t; +typedef struct isc_symtab isc_symtab_t; /*%< Symbol Table */ +typedef struct isc_task isc_task_t; /*%< Task */ typedef ISC_LIST(isc_task_t) isc_tasklist_t; /*%< Task List */ typedef struct isc_taskmgr isc_taskmgr_t; /*%< Task Manager */ typedef struct isc_textregion isc_textregion_t; /*%< Text Region */ diff --git a/lib/isc/managers.c b/lib/isc/managers.c index 628dd33fa9..094e4c0eb8 100644 --- a/lib/isc/managers.c +++ b/lib/isc/managers.c @@ -14,18 +14,15 @@ #include #include "netmgr_p.h" -#include "socket_p.h" #include "task_p.h" #include "timer_p.h" isc_result_t isc_managers_create(isc_mem_t *mctx, size_t workers, size_t quantum, - size_t sockets, isc_nm_t **netmgrp, - isc_taskmgr_t **taskmgrp, isc_timermgr_t **timermgrp, - isc_socketmgr_t **socketmgrp) { + isc_nm_t **netmgrp, isc_taskmgr_t **taskmgrp, + isc_timermgr_t **timermgrp) { isc_result_t result; isc_nm_t *netmgr = NULL; - isc_socketmgr_t *socketmgr = NULL; isc_taskmgr_t *taskmgr = NULL; isc_timermgr_t *timermgr = NULL; @@ -65,29 +62,16 @@ isc_managers_create(isc_mem_t *mctx, size_t workers, size_t quantum, *timermgrp = timermgr; } - REQUIRE(socketmgrp == NULL || *socketmgrp == NULL); - if (socketmgrp != NULL) { - result = isc__socketmgr_create(mctx, &socketmgr, sockets, - workers); - if (result != ISC_R_SUCCESS) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "isc_socketmgr_create() failed: %s", - isc_result_totext(result)); - goto fail; - } - *socketmgrp = socketmgr; - } - return (ISC_R_SUCCESS); fail: - isc_managers_destroy(netmgrp, taskmgrp, timermgrp, socketmgrp); + isc_managers_destroy(netmgrp, taskmgrp, timermgrp); return (result); } void isc_managers_destroy(isc_nm_t **netmgrp, isc_taskmgr_t **taskmgrp, - isc_timermgr_t **timermgrp, isc_socketmgr_t **socketmgrp) { + isc_timermgr_t **timermgrp) { /* * If we have a taskmgr to clean up, then we must also have a netmgr. */ @@ -137,8 +121,4 @@ isc_managers_destroy(isc_nm_t **netmgrp, isc_taskmgr_t **taskmgrp, INSIST(*timermgrp != NULL); isc__timermgr_destroy(timermgrp); } - if (socketmgrp != NULL) { - INSIST(*socketmgrp != NULL); - isc__socketmgr_destroy(socketmgrp); - } } diff --git a/lib/isc/netmgr/http.c b/lib/isc/netmgr/http.c index be3f90821c..d37d788e87 100644 --- a/lib/isc/netmgr/http.c +++ b/lib/isc/netmgr/http.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 571b064759..9b884e0a93 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -128,6 +128,13 @@ static const isc_statscounter_t unixstatsindex[] = { static thread_local int isc__nm_tid_v = ISC_NETMGR_TID_UNKNOWN; +/* + * Set by the -T dscp option on the command line. If set to a value + * other than -1, we check to make sure DSCP values match it, and + * assert if not. (Not currently in use.) + */ +int isc_dscp_check_value = -1; + static void nmsocket_maybe_destroy(isc_nmsocket_t *sock FLARG); static void diff --git a/lib/isc/socket.c b/lib/isc/socket.c deleted file mode 100644 index 1f3b107bf9..0000000000 --- a/lib/isc/socket.c +++ /dev/null @@ -1,5445 +0,0 @@ -/* - * Copyright (C) Internet Systems Consortium, Inc. ("ISC") - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, you can obtain one at https://mozilla.org/MPL/2.0/. - * - * See the COPYRIGHT file distributed with this work for additional - * information regarding copyright ownership. - */ - -/*! \file */ - -#include -#include -#include -#include -#include -#include -#if defined(HAVE_SYS_SYSCTL_H) && !defined(__linux__) -#include -#endif /* if defined(HAVE_SYS_SYSCTL_H) && !defined(__linux__) */ -#include -#include - -#if defined(HAVE_LINUX_NETLINK_H) && defined(HAVE_LINUX_RTNETLINK_H) -#include -#include -#endif /* if defined(HAVE_LINUX_NETLINK_H) && defined(HAVE_LINUX_RTNETLINK_H) \ - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_KQUEUE -#include -#endif /* ifdef HAVE_KQUEUE */ -#ifdef HAVE_EPOLL_CREATE1 -#include -#endif /* ifdef HAVE_EPOLL_CREATE1 */ -#if defined(HAVE_SYS_DEVPOLL_H) -#include -#elif defined(HAVE_DEVPOLL_H) -#include -#endif /* if defined(HAVE_SYS_DEVPOLL_H) */ - -#include - -#include "errno2result.h" -#include "socket_p.h" - -#ifdef ENABLE_TCP_FASTOPEN -#include -#endif /* ifdef ENABLE_TCP_FASTOPEN */ - -#ifdef HAVE_JSON_C -#include -#endif /* HAVE_JSON_C */ - -#ifdef HAVE_LIBXML2 -#include -#define ISC_XMLCHAR (const xmlChar *) -#endif /* HAVE_LIBXML2 */ - -/*% - * Choose the most preferable multiplex method. - */ -#if defined(HAVE_KQUEUE) -#define USE_KQUEUE -#elif defined(HAVE_EPOLL_CREATE1) -#define USE_EPOLL -#elif defined(HAVE_SYS_DEVPOLL_H) || defined(HAVE_DEVPOLL_H) -#define USE_DEVPOLL -typedef struct { - unsigned int want_read : 1, want_write : 1; -} pollinfo_t; -#else /* if defined(HAVE_KQUEUE) */ -#define USE_SELECT -#endif /* HAVE_KQUEUE */ - -/* - * Set by the -T dscp option on the command line. If set to a value - * other than -1, we check to make sure DSCP values match it, and - * assert if not. - */ -int isc_dscp_check_value = -1; - -/*% - * Maximum number of allowable open sockets. This is also the maximum - * allowable socket file descriptor. - * - * Care should be taken before modifying this value for select(): - * The API standard doesn't ensure select() accept more than (the system default - * of) FD_SETSIZE descriptors, and the default size should in fact be fine in - * the vast majority of cases. This constant should therefore be increased only - * when absolutely necessary and possible, i.e., the server is exhausting all - * available file descriptors (up to FD_SETSIZE) and the select() function - * and FD_xxx macros support larger values than FD_SETSIZE (which may not - * always by true, but we keep using some of them to ensure as much - * portability as possible). Note also that overall server performance - * may be rather worsened with a larger value of this constant due to - * inherent scalability problems of select(). - * - * As a special note, this value shouldn't have to be touched if - * this is a build for an authoritative only DNS server. - */ -#ifndef ISC_SOCKET_MAXSOCKETS -#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) -#ifdef TUNE_LARGE -#define ISC_SOCKET_MAXSOCKETS 21000 -#else /* ifdef TUNE_LARGE */ -#define ISC_SOCKET_MAXSOCKETS 4096 -#endif /* TUNE_LARGE */ -#elif defined(USE_SELECT) -#define ISC_SOCKET_MAXSOCKETS FD_SETSIZE -#endif /* USE_KQUEUE... */ -#endif /* ISC_SOCKET_MAXSOCKETS */ - -#ifdef USE_SELECT -/*% - * Mac OS X needs a special definition to support larger values in select(). - * We always define this because a larger value can be specified run-time. - */ -#ifdef __APPLE__ -#define _DARWIN_UNLIMITED_SELECT -#endif /* __APPLE__ */ -#endif /* USE_SELECT */ - -#ifdef ISC_SOCKET_USE_POLLWATCH -/*% - * If this macro is defined, enable workaround for a Solaris /dev/poll kernel - * bug: DP_POLL ioctl could keep sleeping even if socket I/O is possible for - * some of the specified FD. The idea is based on the observation that it's - * likely for a busy server to keep receiving packets. It specifically works - * as follows: the socket watcher is first initialized with the state of - * "poll_idle". While it's in the idle state it keeps sleeping until a socket - * event occurs. When it wakes up for a socket I/O event, it moves to the - * poll_active state, and sets the poll timeout to a short period - * (ISC_SOCKET_POLLWATCH_TIMEOUT msec). If timeout occurs in this state, the - * watcher goes to the poll_checking state with the same timeout period. - * In this state, the watcher tries to detect whether this is a break - * during intermittent events or the kernel bug is triggered. If the next - * polling reports an event within the short period, the previous timeout is - * likely to be a kernel bug, and so the watcher goes back to the active state. - * Otherwise, it moves to the idle state again. - * - * It's not clear whether this is a thread-related bug, but since we've only - * seen this with threads, this workaround is used only when enabling threads. - */ - -typedef enum { poll_idle, poll_active, poll_checking } pollstate_t; - -#ifndef ISC_SOCKET_POLLWATCH_TIMEOUT -#define ISC_SOCKET_POLLWATCH_TIMEOUT 10 -#endif /* ISC_SOCKET_POLLWATCH_TIMEOUT */ -#endif /* ISC_SOCKET_USE_POLLWATCH */ - -/*% - * Per-FD lock buckets, we shuffle them around a bit as FDs come in herds. - */ -#define FDLOCK_BITS 10 -#define FDLOCK_COUNT (1 << FDLOCK_BITS) -#define FDLOCK_ID(fd) \ - (((fd) % (FDLOCK_COUNT) >> (FDLOCK_BITS / 2)) | \ - (((fd) << (FDLOCK_BITS / 2)) % (FDLOCK_COUNT))) - -/*% - * Maximum number of events communicated with the kernel. There should normally - * be no need for having a large number. - */ -#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) -#ifndef ISC_SOCKET_MAXEVENTS -#ifdef TUNE_LARGE -#define ISC_SOCKET_MAXEVENTS 2048 -#else /* ifdef TUNE_LARGE */ -#define ISC_SOCKET_MAXEVENTS 64 -#endif /* TUNE_LARGE */ -#endif /* ifndef ISC_SOCKET_MAXEVENTS */ -#endif /* if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) \ - * */ - -/*% - * Some systems define the socket length argument as an int, some as size_t, - * some as socklen_t. This is here so it can be easily changed if needed. - */ -#ifndef socklen_t -#define socklen_t unsigned int -#endif /* ifndef socklen_t */ - -/*% - * Define what the possible "soft" errors can be. These are non-fatal returns - * of various network related functions, like recv() and so on. - * - * For some reason, BSDI (and perhaps others) will sometimes return <0 - * from recv() but will have errno==0. This is broken, but we have to - * work around it here. - */ -#define SOFT_ERROR(e) \ - ((e) == EAGAIN || (e) == EWOULDBLOCK || (e) == ENOBUFS || \ - (e) == EINTR || (e) == 0) - -#define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x) - -/*!< - * DLVL(90) -- Function entry/exit and other tracing. - * DLVL(70) -- Socket "correctness" -- including returning of events, etc. - * DLVL(60) -- Socket data send/receive - * DLVL(50) -- Event tracing, including receiving/sending completion events. - * DLVL(20) -- Socket creation/destruction. - */ -#define TRACE_LEVEL 90 -#define CORRECTNESS_LEVEL 70 -#define IOEVENT_LEVEL 60 -#define EVENT_LEVEL 50 -#define CREATION_LEVEL 20 - -#define TRACE DLVL(TRACE_LEVEL) -#define CORRECTNESS DLVL(CORRECTNESS_LEVEL) -#define IOEVENT DLVL(IOEVENT_LEVEL) -#define EVENT DLVL(EVENT_LEVEL) -#define CREATION DLVL(CREATION_LEVEL) - -typedef isc_event_t intev_t; - -#define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o') -#define VALID_SOCKET(s) ISC_MAGIC_VALID(s, SOCKET_MAGIC) - -/*! - * IPv6 control information. If the socket is an IPv6 socket we want - * to collect the destination address and interface so the client can - * set them on outgoing packets. - */ -#ifndef USE_CMSG -#define USE_CMSG 1 -#endif /* ifndef USE_CMSG */ - -/*% - * NetBSD and FreeBSD can timestamp packets. XXXMLG Should we have - * a setsockopt() like interface to request timestamps, and if the OS - * doesn't do it for us, call gettimeofday() on every UDP receive? - */ -#ifdef SO_TIMESTAMP -#ifndef USE_CMSG -#define USE_CMSG 1 -#endif /* ifndef USE_CMSG */ -#endif /* ifdef SO_TIMESTAMP */ - -#if defined(SO_RCVBUF) && defined(ISC_RECV_BUFFER_SIZE) -#define SET_RCVBUF -#endif - -#if defined(SO_SNDBUF) && defined(ISC_SEND_BUFFER_SIZE) -#define SET_SNDBUF -#endif - -/*% - * Instead of calculating the cmsgbuf lengths every time we take - * a rule of thumb approach - sizes are taken from x86_64 linux, - * multiplied by 2, everything should fit. Those sizes are not - * large enough to cause any concern. - */ -#if defined(USE_CMSG) -#define CMSG_SP_IN6PKT 40 -#else /* if defined(USE_CMSG) */ -#define CMSG_SP_IN6PKT 0 -#endif /* if defined(USE_CMSG) */ - -#if defined(USE_CMSG) && defined(SO_TIMESTAMP) -#define CMSG_SP_TIMESTAMP 32 -#else /* if defined(USE_CMSG) && defined(SO_TIMESTAMP) */ -#define CMSG_SP_TIMESTAMP 0 -#endif /* if defined(USE_CMSG) && defined(SO_TIMESTAMP) */ - -#if defined(USE_CMSG) && (defined(IPV6_TCLASS) || defined(IP_TOS)) -#define CMSG_SP_TCTOS 24 -#else /* if defined(USE_CMSG) && (defined(IPV6_TCLASS) || defined(IP_TOS)) */ -#define CMSG_SP_TCTOS 0 -#endif /* if defined(USE_CMSG) && (defined(IPV6_TCLASS) || defined(IP_TOS)) */ - -#define CMSG_SP_INT 24 - -/* Align cmsg buffers to be safe on SPARC etc. */ -#define RECVCMSGBUFLEN \ - ISC_ALIGN(2 * (CMSG_SP_IN6PKT + CMSG_SP_TIMESTAMP + CMSG_SP_TCTOS) + \ - 1, \ - sizeof(void *)) -#define SENDCMSGBUFLEN \ - ISC_ALIGN(2 * (CMSG_SP_IN6PKT + CMSG_SP_INT + CMSG_SP_TCTOS) + 1, \ - sizeof(void *)) - -/*% - * The number of times a send operation is repeated if the result is EINTR. - */ -#define NRETRIES 10 - -typedef struct isc__socketthread isc__socketthread_t; - -#define NEWCONNSOCK(ev) ((ev)->newsocket) - -struct isc_socket { - /* Not locked. */ - unsigned int magic; - isc_socketmgr_t *manager; - isc_mutex_t lock; - isc_sockettype_t type; - const isc_statscounter_t *statsindex; - isc_refcount_t references; - - /* Locked by socket lock. */ - ISC_LINK(isc_socket_t) link; - int fd; - int pf; - int threadid; - char name[16]; - void *tag; - - ISC_LIST(isc_socketevent_t) send_list; - ISC_LIST(isc_socketevent_t) recv_list; - ISC_LIST(isc_socket_newconnev_t) accept_list; - ISC_LIST(isc_socket_connev_t) connect_list; - - isc_sockaddr_t peer_address; /* remote address */ - - unsigned int listener : 1, /* listener socket */ - connected : 1, connecting : 1, /* connect pending - * */ - bound : 1, /* bound to local addr */ - active : 1, /* currently active */ - pktdscp : 1; /* per packet dscp */ - -#ifdef ISC_PLATFORM_RECVOVERFLOW - unsigned char overflow; /* used for MSG_TRUNC fake */ -#endif /* ifdef ISC_PLATFORM_RECVOVERFLOW */ - - unsigned int dscp; -}; - -#define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g') -#define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC) - -struct isc_socketmgr { - /* Not locked. */ - unsigned int magic; - isc_mem_t *mctx; - isc_mutex_t lock; - isc_stats_t *stats; - int nthreads; - isc__socketthread_t *threads; - unsigned int maxsocks; - /* Locked by manager lock. */ - ISC_LIST(isc_socket_t) socklist; - int reserved; /* unlocked */ - isc_condition_t shutdown_ok; - size_t maxudp; -}; - -struct isc__socketthread { - isc_socketmgr_t *manager; - int threadid; - isc_thread_t thread; - int pipe_fds[2]; - isc_mutex_t *fdlock; - /* Locked by fdlock. */ - isc_socket_t **fds; - int *fdstate; -#ifdef USE_KQUEUE - int kqueue_fd; - int nevents; - struct kevent *events; -#endif /* USE_KQUEUE */ -#ifdef USE_EPOLL - int epoll_fd; - int nevents; - struct epoll_event *events; - uint32_t *epoll_events; -#endif /* USE_EPOLL */ -#ifdef USE_DEVPOLL - int devpoll_fd; - isc_resourcevalue_t open_max; - unsigned int calls; - int nevents; - struct pollfd *events; - pollinfo_t *fdpollinfo; -#endif /* USE_DEVPOLL */ -#ifdef USE_SELECT - int fd_bufsize; - fd_set *read_fds; - fd_set *read_fds_copy; - fd_set *write_fds; - fd_set *write_fds_copy; - int maxfd; -#endif /* USE_SELECT */ -}; - -#define CLOSED 0 /* this one must be zero */ -#define MANAGED 1 -#define CLOSE_PENDING 2 - -/* - * send() and recv() iovec counts - */ -#define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER) -#ifdef ISC_PLATFORM_RECVOVERFLOW -#define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER + 1) -#else /* ifdef ISC_PLATFORM_RECVOVERFLOW */ -#define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER) -#endif /* ifdef ISC_PLATFORM_RECVOVERFLOW */ - -static isc_result_t -socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type, - isc_socket_t **socketp); -static void -send_recvdone_event(isc_socket_t *, isc_socketevent_t **); -static void -send_senddone_event(isc_socket_t *, isc_socketevent_t **); -static void -send_connectdone_event(isc_socket_t *, isc_socket_connev_t **); -static void -free_socket(isc_socket_t **); -static isc_result_t -allocate_socket(isc_socketmgr_t *, isc_sockettype_t, isc_socket_t **); -static void -destroy(isc_socket_t **); -static void -internal_accept(isc_socket_t *); -static void -internal_connect(isc_socket_t *); -static void -internal_recv(isc_socket_t *); -static void -internal_send(isc_socket_t *); -static void -process_cmsg(isc_socket_t *, struct msghdr *, isc_socketevent_t *); -static void -build_msghdr_send(isc_socket_t *, char *, isc_socketevent_t *, struct msghdr *, - struct iovec *, size_t *); -static void -build_msghdr_recv(isc_socket_t *, char *, isc_socketevent_t *, struct msghdr *, - struct iovec *, size_t *); -static bool -process_ctlfd(isc__socketthread_t *thread); -static void -setdscp(isc_socket_t *sock, isc_dscp_t dscp); - -#define SELECT_POKE_SHUTDOWN (-1) -#define SELECT_POKE_NOTHING (-2) -#define SELECT_POKE_READ (-3) -#define SELECT_POKE_ACCEPT (-3) /*%< Same as _READ */ -#define SELECT_POKE_WRITE (-4) -#define SELECT_POKE_CONNECT (-4) /*%< Same as _WRITE */ -#define SELECT_POKE_CLOSE (-5) - -/*% - * Shortcut index arrays to get access to statistics counters. - */ -enum { - STATID_OPEN = 0, - STATID_OPENFAIL = 1, - STATID_CLOSE = 2, - STATID_BINDFAIL = 3, - STATID_CONNECTFAIL = 4, - STATID_CONNECT = 5, - STATID_ACCEPTFAIL = 6, - STATID_ACCEPT = 7, - STATID_SENDFAIL = 8, - STATID_RECVFAIL = 9, - STATID_ACTIVE = 10 -}; -static const isc_statscounter_t udp4statsindex[] = { - isc_sockstatscounter_udp4open, - isc_sockstatscounter_udp4openfail, - isc_sockstatscounter_udp4close, - isc_sockstatscounter_udp4bindfail, - isc_sockstatscounter_udp4connectfail, - isc_sockstatscounter_udp4connect, - -1, - -1, - isc_sockstatscounter_udp4sendfail, - isc_sockstatscounter_udp4recvfail, - isc_sockstatscounter_udp4active -}; -static const isc_statscounter_t udp6statsindex[] = { - isc_sockstatscounter_udp6open, - isc_sockstatscounter_udp6openfail, - isc_sockstatscounter_udp6close, - isc_sockstatscounter_udp6bindfail, - isc_sockstatscounter_udp6connectfail, - isc_sockstatscounter_udp6connect, - -1, - -1, - isc_sockstatscounter_udp6sendfail, - isc_sockstatscounter_udp6recvfail, - isc_sockstatscounter_udp6active -}; -static const isc_statscounter_t tcp4statsindex[] = { - isc_sockstatscounter_tcp4open, isc_sockstatscounter_tcp4openfail, - isc_sockstatscounter_tcp4close, isc_sockstatscounter_tcp4bindfail, - isc_sockstatscounter_tcp4connectfail, isc_sockstatscounter_tcp4connect, - isc_sockstatscounter_tcp4acceptfail, isc_sockstatscounter_tcp4accept, - isc_sockstatscounter_tcp4sendfail, isc_sockstatscounter_tcp4recvfail, - isc_sockstatscounter_tcp4active -}; -static const isc_statscounter_t tcp6statsindex[] = { - isc_sockstatscounter_tcp6open, isc_sockstatscounter_tcp6openfail, - isc_sockstatscounter_tcp6close, isc_sockstatscounter_tcp6bindfail, - isc_sockstatscounter_tcp6connectfail, isc_sockstatscounter_tcp6connect, - isc_sockstatscounter_tcp6acceptfail, isc_sockstatscounter_tcp6accept, - isc_sockstatscounter_tcp6sendfail, isc_sockstatscounter_tcp6recvfail, - isc_sockstatscounter_tcp6active -}; -static const isc_statscounter_t unixstatsindex[] = { - isc_sockstatscounter_unixopen, isc_sockstatscounter_unixopenfail, - isc_sockstatscounter_unixclose, isc_sockstatscounter_unixbindfail, - isc_sockstatscounter_unixconnectfail, isc_sockstatscounter_unixconnect, - isc_sockstatscounter_unixacceptfail, isc_sockstatscounter_unixaccept, - isc_sockstatscounter_unixsendfail, isc_sockstatscounter_unixrecvfail, - isc_sockstatscounter_unixactive -}; -static const isc_statscounter_t rawstatsindex[] = { - isc_sockstatscounter_rawopen, - isc_sockstatscounter_rawopenfail, - isc_sockstatscounter_rawclose, - -1, - -1, - -1, - -1, - -1, - -1, - isc_sockstatscounter_rawrecvfail, - isc_sockstatscounter_rawactive -}; - -static int -gen_threadid(isc_socket_t *sock); - -static int -gen_threadid(isc_socket_t *sock) { - return (sock->fd % sock->manager->nthreads); -} - -static void -manager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category, - isc_logmodule_t *module, int level, const char *fmt, ...) - ISC_FORMAT_PRINTF(5, 6); -static void -manager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category, - isc_logmodule_t *module, int level, const char *fmt, ...) { - char msgbuf[2048]; - va_list ap; - - if (!isc_log_wouldlog(isc_lctx, level)) { - return; - } - - va_start(ap, fmt); - vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); - va_end(ap); - - isc_log_write(isc_lctx, category, module, level, "sockmgr %p: %s", - sockmgr, msgbuf); -} - -static void -thread_log(isc__socketthread_t *thread, isc_logcategory_t *category, - isc_logmodule_t *module, int level, const char *fmt, ...) - ISC_FORMAT_PRINTF(5, 6); -static void -thread_log(isc__socketthread_t *thread, isc_logcategory_t *category, - isc_logmodule_t *module, int level, const char *fmt, ...) { - char msgbuf[2048]; - va_list ap; - - if (!isc_log_wouldlog(isc_lctx, level)) { - return; - } - - va_start(ap, fmt); - vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); - va_end(ap); - - isc_log_write(isc_lctx, category, module, level, - "sockmgr %p thread %d: %s", thread->manager, - thread->threadid, msgbuf); -} - -static void -socket_log(isc_socket_t *sock, const isc_sockaddr_t *address, - isc_logcategory_t *category, isc_logmodule_t *module, int level, - const char *fmt, ...) ISC_FORMAT_PRINTF(6, 7); -static void -socket_log(isc_socket_t *sock, const isc_sockaddr_t *address, - isc_logcategory_t *category, isc_logmodule_t *module, int level, - const char *fmt, ...) { - char msgbuf[2048]; - char peerbuf[ISC_SOCKADDR_FORMATSIZE]; - va_list ap; - - if (!isc_log_wouldlog(isc_lctx, level)) { - return; - } - - va_start(ap, fmt); - vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); - va_end(ap); - - if (address == NULL) { - isc_log_write(isc_lctx, category, module, level, - "socket %p: %s", sock, msgbuf); - } else { - isc_sockaddr_format(address, peerbuf, sizeof(peerbuf)); - isc_log_write(isc_lctx, category, module, level, - "socket %p %s: %s", sock, peerbuf, msgbuf); - } -} - -/*% - * Increment socket-related statistics counters. - */ -static inline void -inc_stats(isc_stats_t *stats, isc_statscounter_t counterid) { - REQUIRE(counterid != -1); - - if (stats != NULL) { - isc_stats_increment(stats, counterid); - } -} - -/*% - * Decrement socket-related statistics counters. - */ -static inline void -dec_stats(isc_stats_t *stats, isc_statscounter_t counterid) { - REQUIRE(counterid != -1); - - if (stats != NULL) { - isc_stats_decrement(stats, counterid); - } -} - -static inline isc_result_t -watch_fd(isc__socketthread_t *thread, int fd, int msg) { - isc_result_t result = ISC_R_SUCCESS; - -#ifdef USE_KQUEUE - struct kevent evchange; - - memset(&evchange, 0, sizeof(evchange)); - if (msg == SELECT_POKE_READ) { - evchange.filter = EVFILT_READ; - } else { - evchange.filter = EVFILT_WRITE; - } - evchange.flags = EV_ADD; - evchange.ident = fd; - if (kevent(thread->kqueue_fd, &evchange, 1, NULL, 0, NULL) != 0) { - result = isc__errno2result(errno); - } - - return (result); -#elif defined(USE_EPOLL) - struct epoll_event event; - uint32_t oldevents; - int ret; - int op; - - oldevents = thread->epoll_events[fd]; - if (msg == SELECT_POKE_READ) { - thread->epoll_events[fd] |= EPOLLIN; - } else { - thread->epoll_events[fd] |= EPOLLOUT; - } - - event.events = thread->epoll_events[fd]; - memset(&event.data, 0, sizeof(event.data)); - event.data.fd = fd; - - op = (oldevents == 0U) ? EPOLL_CTL_ADD : EPOLL_CTL_MOD; - if (thread->fds[fd] != NULL) { - LOCK(&thread->fds[fd]->lock); - } - ret = epoll_ctl(thread->epoll_fd, op, fd, &event); - if (thread->fds[fd] != NULL) { - UNLOCK(&thread->fds[fd]->lock); - } - if (ret == -1) { - if (errno == EEXIST) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "epoll_ctl(ADD/MOD) returned " - "EEXIST for fd %d", - fd); - } - result = isc__errno2result(errno); - } - - return (result); -#elif defined(USE_DEVPOLL) - struct pollfd pfd; - - memset(&pfd, 0, sizeof(pfd)); - if (msg == SELECT_POKE_READ) { - pfd.events = POLLIN; - } else { - pfd.events = POLLOUT; - } - pfd.fd = fd; - pfd.revents = 0; - if (write(thread->devpoll_fd, &pfd, sizeof(pfd)) == -1) { - result = isc__errno2result(errno); - } else { - if (msg == SELECT_POKE_READ) { - thread->fdpollinfo[fd].want_read = 1; - } else { - thread->fdpollinfo[fd].want_write = 1; - } - } - - return (result); -#elif defined(USE_SELECT) - LOCK(&thread->manager->lock); - if (msg == SELECT_POKE_READ) { - FD_SET(fd, thread->read_fds); - } - if (msg == SELECT_POKE_WRITE) { - FD_SET(fd, thread->write_fds); - } - UNLOCK(&thread->manager->lock); - - return (result); -#endif /* ifdef USE_KQUEUE */ -} - -static inline isc_result_t -unwatch_fd(isc__socketthread_t *thread, int fd, int msg) { - isc_result_t result = ISC_R_SUCCESS; - -#ifdef USE_KQUEUE - struct kevent evchange; - - memset(&evchange, 0, sizeof(evchange)); - if (msg == SELECT_POKE_READ) { - evchange.filter = EVFILT_READ; - } else { - evchange.filter = EVFILT_WRITE; - } - evchange.flags = EV_DELETE; - evchange.ident = fd; - if (kevent(thread->kqueue_fd, &evchange, 1, NULL, 0, NULL) != 0) { - result = isc__errno2result(errno); - } - - return (result); -#elif defined(USE_EPOLL) - struct epoll_event event; - int ret; - int op; - - if (msg == SELECT_POKE_READ) { - thread->epoll_events[fd] &= ~(EPOLLIN); - } else { - thread->epoll_events[fd] &= ~(EPOLLOUT); - } - - event.events = thread->epoll_events[fd]; - memset(&event.data, 0, sizeof(event.data)); - event.data.fd = fd; - - op = (event.events == 0U) ? EPOLL_CTL_DEL : EPOLL_CTL_MOD; - ret = epoll_ctl(thread->epoll_fd, op, fd, &event); - if (ret == -1 && errno != ENOENT) { - char strbuf[ISC_STRERRORSIZE]; - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "epoll_ctl(DEL), %d: %s", - fd, strbuf); - result = ISC_R_UNEXPECTED; - } - return (result); -#elif defined(USE_DEVPOLL) - struct pollfd pfds[2]; - size_t writelen = sizeof(pfds[0]); - - memset(pfds, 0, sizeof(pfds)); - pfds[0].events = POLLREMOVE; - pfds[0].fd = fd; - - /* - * Canceling read or write polling via /dev/poll is tricky. Since it - * only provides a way of canceling per FD, we may need to re-poll the - * socket for the other operation. - */ - if (msg == SELECT_POKE_READ && thread->fdpollinfo[fd].want_write == 1) { - pfds[1].events = POLLOUT; - pfds[1].fd = fd; - writelen += sizeof(pfds[1]); - } - if (msg == SELECT_POKE_WRITE && thread->fdpollinfo[fd].want_read == 1) { - pfds[1].events = POLLIN; - pfds[1].fd = fd; - writelen += sizeof(pfds[1]); - } - - if (write(thread->devpoll_fd, pfds, writelen) == -1) { - result = isc__errno2result(errno); - } else { - if (msg == SELECT_POKE_READ) { - thread->fdpollinfo[fd].want_read = 0; - } else { - thread->fdpollinfo[fd].want_write = 0; - } - } - - return (result); -#elif defined(USE_SELECT) - LOCK(&thread->manager->lock); - if (msg == SELECT_POKE_READ) { - FD_CLR(fd, thread->read_fds); - } else if (msg == SELECT_POKE_WRITE) { - FD_CLR(fd, thread->write_fds); - } - UNLOCK(&thread->manager->lock); - - return (result); -#endif /* ifdef USE_KQUEUE */ -} - -/* - * A poke message was received, perform a proper watch/unwatch - * on a fd provided - */ -static void -wakeup_socket(isc__socketthread_t *thread, int fd, int msg) { - isc_result_t result; - int lockid = FDLOCK_ID(fd); - - /* - * This is a wakeup on a socket. If the socket is not in the - * process of being closed, start watching it for either reads - * or writes. - */ - - INSIST(fd >= 0 && fd < (int)thread->manager->maxsocks); - - if (msg == SELECT_POKE_CLOSE) { - LOCK(&thread->fdlock[lockid]); - INSIST(thread->fdstate[fd] == CLOSE_PENDING); - thread->fdstate[fd] = CLOSED; - (void)unwatch_fd(thread, fd, SELECT_POKE_READ); - (void)unwatch_fd(thread, fd, SELECT_POKE_WRITE); - (void)close(fd); - UNLOCK(&thread->fdlock[lockid]); - return; - } - - LOCK(&thread->fdlock[lockid]); - if (thread->fdstate[fd] == CLOSE_PENDING) { - /* - * We accept (and ignore) any error from unwatch_fd() as we are - * closing the socket, hoping it doesn't leave dangling state in - * the kernel. - * Note that unwatch_fd() must be called after releasing the - * fdlock; otherwise it could cause deadlock due to a lock order - * reversal. - */ - (void)unwatch_fd(thread, fd, SELECT_POKE_READ); - (void)unwatch_fd(thread, fd, SELECT_POKE_WRITE); - UNLOCK(&thread->fdlock[lockid]); - return; - } - if (thread->fdstate[fd] != MANAGED) { - UNLOCK(&thread->fdlock[lockid]); - return; - } - - /* - * Set requested bit. - */ - result = watch_fd(thread, fd, msg); - if (result != ISC_R_SUCCESS) { - /* - * XXXJT: what should we do? Ignoring the failure of watching - * a socket will make the application dysfunctional, but there - * seems to be no reasonable recovery process. - */ - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "failed to start watching FD (%d): %s", fd, - isc_result_totext(result)); - } - UNLOCK(&thread->fdlock[lockid]); -} - -/* - * Poke the select loop when there is something for us to do. - * The write is required (by POSIX) to complete. That is, we - * will not get partial writes. - */ -static void -select_poke(isc_socketmgr_t *mgr, int threadid, int fd, int msg) { - int cc; - int buf[2]; - char strbuf[ISC_STRERRORSIZE]; - - buf[0] = fd; - buf[1] = msg; - - do { - cc = write(mgr->threads[threadid].pipe_fds[1], buf, - sizeof(buf)); -#ifdef ENOSR - /* - * Treat ENOSR as EAGAIN but loop slowly as it is - * unlikely to clear fast. - */ - if (cc < 0 && errno == ENOSR) { - sleep(1); - errno = EAGAIN; - } -#endif /* ifdef ENOSR */ - } while (cc < 0 && SOFT_ERROR(errno)); - - if (cc < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - FATAL_ERROR(__FILE__, __LINE__, - "write() failed during watcher poke: %s", strbuf); - } - - INSIST(cc == sizeof(buf)); -} - -/* - * Read a message on the internal fd. - */ -static void -select_readmsg(isc__socketthread_t *thread, int *fd, int *msg) { - int buf[2]; - int cc; - char strbuf[ISC_STRERRORSIZE]; - - cc = read(thread->pipe_fds[0], buf, sizeof(buf)); - if (cc < 0) { - *msg = SELECT_POKE_NOTHING; - *fd = -1; /* Silence compiler. */ - if (SOFT_ERROR(errno)) { - return; - } - - strerror_r(errno, strbuf, sizeof(strbuf)); - FATAL_ERROR(__FILE__, __LINE__, - "read() failed during watcher poke: %s", strbuf); - } - INSIST(cc == sizeof(buf)); - - *fd = buf[0]; - *msg = buf[1]; -} - -/* - * Make a fd non-blocking. - */ -static isc_result_t -make_nonblock(int fd) { - int ret; - char strbuf[ISC_STRERRORSIZE]; -#ifdef USE_FIONBIO_IOCTL - int on = 1; -#else /* ifdef USE_FIONBIO_IOCTL */ - int flags; -#endif /* ifdef USE_FIONBIO_IOCTL */ - -#ifdef USE_FIONBIO_IOCTL - ret = ioctl(fd, FIONBIO, (char *)&on); -#else /* ifdef USE_FIONBIO_IOCTL */ - flags = fcntl(fd, F_GETFL, 0); - flags |= O_NONBLOCK; - ret = fcntl(fd, F_SETFL, flags); -#endif /* ifdef USE_FIONBIO_IOCTL */ - - if (ret == -1) { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, -#ifdef USE_FIONBIO_IOCTL - "ioctl(%d, FIONBIO, &on): %s", fd, -#else /* ifdef USE_FIONBIO_IOCTL */ - "fcntl(%d, F_SETFL, %d): %s", fd, flags, -#endif /* ifdef USE_FIONBIO_IOCTL */ - strbuf); - - return (ISC_R_UNEXPECTED); - } - - return (ISC_R_SUCCESS); -} - -#ifdef USE_CMSG -/* - * Not all OSes support advanced CMSG macros: CMSG_LEN and CMSG_SPACE. - * In order to ensure as much portability as possible, we provide wrapper - * functions of these macros. - * Note that cmsg_space() could run slow on OSes that do not have - * CMSG_SPACE. - */ -static inline socklen_t -cmsg_len(socklen_t len) { -#ifdef CMSG_LEN - return (CMSG_LEN(len)); -#else /* ifdef CMSG_LEN */ - socklen_t hdrlen; - - /* - * Cast NULL so that any pointer arithmetic performed by CMSG_DATA - * is correct. - */ - hdrlen = (socklen_t)CMSG_DATA(((struct cmsghdr *)NULL)); - return (hdrlen + len); -#endif /* ifdef CMSG_LEN */ -} - -static inline socklen_t -cmsg_space(socklen_t len) { -#ifdef CMSG_SPACE - return (CMSG_SPACE(len)); -#else /* ifdef CMSG_SPACE */ - struct msghdr msg; - struct cmsghdr *cmsgp; - /* - * XXX: The buffer length is an ad-hoc value, but should be enough - * in a practical sense. - */ - char dummybuf[sizeof(struct cmsghdr) + 1024]; - - memset(&msg, 0, sizeof(msg)); - msg.msg_control = dummybuf; - msg.msg_controllen = sizeof(dummybuf); - - cmsgp = (struct cmsghdr *)dummybuf; - cmsgp->cmsg_len = cmsg_len(len); - - cmsgp = CMSG_NXTHDR(&msg, cmsgp); - if (cmsgp != NULL) { - return ((char *)cmsgp - (char *)msg.msg_control); - } else { - return (0); - } -#endif /* ifdef CMSG_SPACE */ -} -#endif /* USE_CMSG */ - -/* - * Process control messages received on a socket. - */ -static void -process_cmsg(isc_socket_t *sock, struct msghdr *msg, isc_socketevent_t *dev) { -#ifdef USE_CMSG - struct cmsghdr *cmsgp; - struct in6_pktinfo *pktinfop; -#ifdef SO_TIMESTAMP - void *timevalp; -#endif /* ifdef SO_TIMESTAMP */ -#endif /* ifdef USE_CMSG */ - - /* - * sock is used only when ISC_NET_BSD44MSGHDR and USE_CMSG are defined. - * msg and dev are used only when ISC_NET_BSD44MSGHDR is defined. - * They are all here, outside of the CPP tests, because it is - * more consistent with the usual ISC coding style. - */ - UNUSED(sock); - UNUSED(msg); - UNUSED(dev); - -#ifdef MSG_TRUNC - if ((msg->msg_flags & MSG_TRUNC) != 0) { - dev->attributes |= ISC_SOCKEVENTATTR_TRUNC; - } -#endif /* ifdef MSG_TRUNC */ - -#ifdef MSG_CTRUNC - if ((msg->msg_flags & MSG_CTRUNC) != 0) { - dev->attributes |= ISC_SOCKEVENTATTR_CTRUNC; - } -#endif /* ifdef MSG_CTRUNC */ - -#ifndef USE_CMSG - return; -#else /* ifndef USE_CMSG */ - if (msg->msg_controllen == 0U || msg->msg_control == NULL) { - return; - } - -#ifdef SO_TIMESTAMP - timevalp = NULL; -#endif /* ifdef SO_TIMESTAMP */ - pktinfop = NULL; - - cmsgp = CMSG_FIRSTHDR(msg); - while (cmsgp != NULL) { - socket_log(sock, NULL, TRACE, "processing cmsg %p", cmsgp); - - if (cmsgp->cmsg_level == IPPROTO_IPV6 && - cmsgp->cmsg_type == IPV6_PKTINFO) { - pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp); - memmove(&dev->pktinfo, pktinfop, - sizeof(struct in6_pktinfo)); - dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; - socket_log(sock, NULL, TRACE, - "interface received on ifindex %u", - dev->pktinfo.ipi6_ifindex); - if (IN6_IS_ADDR_MULTICAST(&pktinfop->ipi6_addr)) { - dev->attributes |= ISC_SOCKEVENTATTR_MULTICAST; - } - goto next; - } - -#ifdef SO_TIMESTAMP - if (cmsgp->cmsg_level == SOL_SOCKET && - cmsgp->cmsg_type == SCM_TIMESTAMP) { - struct timeval tv; - timevalp = CMSG_DATA(cmsgp); - memmove(&tv, timevalp, sizeof(tv)); - dev->timestamp.seconds = tv.tv_sec; - dev->timestamp.nanoseconds = tv.tv_usec * 1000; - dev->attributes |= ISC_SOCKEVENTATTR_TIMESTAMP; - goto next; - } -#endif /* ifdef SO_TIMESTAMP */ - -#ifdef IPV6_TCLASS - if (cmsgp->cmsg_level == IPPROTO_IPV6 && - cmsgp->cmsg_type == IPV6_TCLASS) { - dev->dscp = *(int *)CMSG_DATA(cmsgp); - dev->dscp >>= 2; - dev->attributes |= ISC_SOCKEVENTATTR_DSCP; - goto next; - } -#endif /* ifdef IPV6_TCLASS */ - -#ifdef IP_TOS - if (cmsgp->cmsg_level == IPPROTO_IP && - (cmsgp->cmsg_type == IP_TOS -#ifdef IP_RECVTOS - || cmsgp->cmsg_type == IP_RECVTOS -#endif /* ifdef IP_RECVTOS */ - )) - { - dev->dscp = (int)*(unsigned char *)CMSG_DATA(cmsgp); - dev->dscp >>= 2; - dev->attributes |= ISC_SOCKEVENTATTR_DSCP; - goto next; - } -#endif /* ifdef IP_TOS */ - next: - cmsgp = CMSG_NXTHDR(msg, cmsgp); - } -#endif /* USE_CMSG */ -} - -/* - * Construct an iov array and attach it to the msghdr passed in. This is - * the SEND constructor, which will use the used region of the buffer - * (if using a buffer list) or will use the internal region (if a single - * buffer I/O is requested). - * - * Nothing can be NULL, and the done event must list at least one buffer - * on the buffer linked list for this function to be meaningful. - * - * If write_countp != NULL, *write_countp will hold the number of bytes - * this transaction can send. - */ -static void -build_msghdr_send(isc_socket_t *sock, char *cmsgbuf, isc_socketevent_t *dev, - struct msghdr *msg, struct iovec *iov, size_t *write_countp) { - unsigned int iovcount; - size_t write_count; - struct cmsghdr *cmsgp; - - memset(msg, 0, sizeof(*msg)); - - if (!sock->connected) { - msg->msg_name = (void *)&dev->address.type.sa; - msg->msg_namelen = dev->address.length; - } else { - msg->msg_name = NULL; - msg->msg_namelen = 0; - } - - write_count = dev->region.length - dev->n; - iov[0].iov_base = (void *)(dev->region.base + dev->n); - iov[0].iov_len = write_count; - iovcount = 1; - - msg->msg_iov = iov; - msg->msg_iovlen = iovcount; - msg->msg_control = NULL; - msg->msg_controllen = 0; - msg->msg_flags = 0; -#if defined(USE_CMSG) - - if ((sock->type == isc_sockettype_udp) && - ((dev->attributes & ISC_SOCKEVENTATTR_PKTINFO) != 0)) - { - struct in6_pktinfo *pktinfop; - - socket_log(sock, NULL, TRACE, "sendto pktinfo data, ifindex %u", - dev->pktinfo.ipi6_ifindex); - - msg->msg_control = (void *)cmsgbuf; - msg->msg_controllen = cmsg_space(sizeof(struct in6_pktinfo)); - INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); - - cmsgp = (struct cmsghdr *)cmsgbuf; - cmsgp->cmsg_level = IPPROTO_IPV6; - cmsgp->cmsg_type = IPV6_PKTINFO; - cmsgp->cmsg_len = cmsg_len(sizeof(struct in6_pktinfo)); - pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp); - memmove(pktinfop, &dev->pktinfo, sizeof(struct in6_pktinfo)); - } - -#if defined(IPV6_USE_MIN_MTU) - if ((sock->type == isc_sockettype_udp) && (sock->pf == AF_INET6) && - ((dev->attributes & ISC_SOCKEVENTATTR_USEMINMTU) != 0)) - { - int use_min_mtu = 1; /* -1, 0, 1 */ - - cmsgp = (struct cmsghdr *)(cmsgbuf + msg->msg_controllen); - msg->msg_control = (void *)cmsgbuf; - msg->msg_controllen += cmsg_space(sizeof(use_min_mtu)); - INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); - - cmsgp->cmsg_level = IPPROTO_IPV6; - cmsgp->cmsg_type = IPV6_USE_MIN_MTU; - cmsgp->cmsg_len = cmsg_len(sizeof(use_min_mtu)); - memmove(CMSG_DATA(cmsgp), &use_min_mtu, sizeof(use_min_mtu)); - } -#endif /* if defined(IPV6_USE_MIN_MTU) */ - - if (isc_dscp_check_value > -1) { - if (sock->type == isc_sockettype_udp) { - INSIST((int)dev->dscp == isc_dscp_check_value); - } else if (sock->type == isc_sockettype_tcp) { - INSIST((int)sock->dscp == isc_dscp_check_value); - } - } - -#if defined(IP_TOS) || (defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)) - if ((sock->type == isc_sockettype_udp) && - ((dev->attributes & ISC_SOCKEVENTATTR_DSCP) != 0)) - { - int dscp = (dev->dscp << 2) & 0xff; - - INSIST(dev->dscp < 0x40); - -#ifdef IP_TOS - if (sock->pf == AF_INET && sock->pktdscp) { - cmsgp = (struct cmsghdr *)(cmsgbuf + - msg->msg_controllen); - msg->msg_control = (void *)cmsgbuf; - msg->msg_controllen += cmsg_space(sizeof(dscp)); - INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); - - cmsgp->cmsg_level = IPPROTO_IP; - cmsgp->cmsg_type = IP_TOS; - cmsgp->cmsg_len = cmsg_len(sizeof(char)); - *(unsigned char *)CMSG_DATA(cmsgp) = dscp; - } else if (sock->pf == AF_INET && sock->dscp != dev->dscp) { - if (setsockopt(sock->fd, IPPROTO_IP, IP_TOS, - (void *)&dscp, sizeof(int)) < 0) { - char strbuf[ISC_STRERRORSIZE]; - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IP_TOS, %.02x)" - " failed: %s", - sock->fd, dscp >> 2, strbuf); - } else { - sock->dscp = dscp; - } - } -#endif /* ifdef IP_TOS */ -#if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS) - if (sock->pf == AF_INET6 && sock->pktdscp) { - cmsgp = (struct cmsghdr *)(cmsgbuf + - msg->msg_controllen); - msg->msg_control = (void *)cmsgbuf; - msg->msg_controllen += cmsg_space(sizeof(dscp)); - INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); - - cmsgp->cmsg_level = IPPROTO_IPV6; - cmsgp->cmsg_type = IPV6_TCLASS; - cmsgp->cmsg_len = cmsg_len(sizeof(dscp)); - memmove(CMSG_DATA(cmsgp), &dscp, sizeof(dscp)); - } else if (sock->pf == AF_INET6 && sock->dscp != dev->dscp) { - if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_TCLASS, - (void *)&dscp, sizeof(int)) < 0) - { - char strbuf[ISC_STRERRORSIZE]; - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IPV6_TCLASS, " - "%.02x) failed: %s", - sock->fd, dscp >> 2, strbuf); - } else { - sock->dscp = dscp; - } - } -#endif /* if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS) */ - if (msg->msg_controllen != 0 && - msg->msg_controllen < SENDCMSGBUFLEN) { - memset(cmsgbuf + msg->msg_controllen, 0, - SENDCMSGBUFLEN - msg->msg_controllen); - } - } -#endif /* if defined(IP_TOS) || (defined(IPPROTO_IPV6) && \ - * defined(IPV6_TCLASS)) \ - * */ -#endif /* USE_CMSG */ - - if (write_countp != NULL) { - *write_countp = write_count; - } -} - -/* - * Construct an iov array and attach it to the msghdr passed in. This is - * the RECV constructor, which will use the available region of the buffer - * (if using a buffer list) or will use the internal region (if a single - * buffer I/O is requested). - * - * Nothing can be NULL, and the done event must list at least one buffer - * on the buffer linked list for this function to be meaningful. - * - * If read_countp != NULL, *read_countp will hold the number of bytes - * this transaction can receive. - */ -static void -build_msghdr_recv(isc_socket_t *sock, char *cmsgbuf, isc_socketevent_t *dev, - struct msghdr *msg, struct iovec *iov, size_t *read_countp) { - unsigned int iovcount; - size_t read_count; - - memset(msg, 0, sizeof(struct msghdr)); - - if (sock->type == isc_sockettype_udp) { - memset(&dev->address, 0, sizeof(dev->address)); - msg->msg_name = (void *)&dev->address.type.sa; - msg->msg_namelen = sizeof(dev->address.type); - } else { /* TCP */ - msg->msg_name = NULL; - msg->msg_namelen = 0; - dev->address = sock->peer_address; - } - - read_count = dev->region.length - dev->n; - iov[0].iov_base = (void *)(dev->region.base + dev->n); - iov[0].iov_len = read_count; - iovcount = 1; - - /* - * If needed, set up to receive that one extra byte. - */ -#ifdef ISC_PLATFORM_RECVOVERFLOW - if (sock->type == isc_sockettype_udp) { - INSIST(iovcount < MAXSCATTERGATHER_RECV); - iov[iovcount].iov_base = (void *)(&sock->overflow); - iov[iovcount].iov_len = 1; - iovcount++; - } -#endif /* ifdef ISC_PLATFORM_RECVOVERFLOW */ - - msg->msg_iov = iov; - msg->msg_iovlen = iovcount; - -#if defined(USE_CMSG) - msg->msg_control = cmsgbuf; - msg->msg_controllen = RECVCMSGBUFLEN; -#else /* if defined(USE_CMSG) */ - msg->msg_control = NULL; - msg->msg_controllen = 0; -#endif /* USE_CMSG */ - msg->msg_flags = 0; - - if (read_countp != NULL) { - *read_countp = read_count; - } -} - -static void -set_dev_address(const isc_sockaddr_t *address, isc_socket_t *sock, - isc_socketevent_t *dev) { - if (sock->type == isc_sockettype_udp) { - if (address != NULL) { - dev->address = *address; - } else { - dev->address = sock->peer_address; - } - } else if (sock->type == isc_sockettype_tcp) { - INSIST(address == NULL); - dev->address = sock->peer_address; - } -} - -static void -destroy_socketevent(isc_event_t *event) { - isc_socketevent_t *ev = (isc_socketevent_t *)event; - - (ev->destroy)(event); -} - -static isc_socketevent_t * -allocate_socketevent(isc_mem_t *mctx, void *sender, isc_eventtype_t eventtype, - isc_taskaction_t action, void *arg) { - isc_socketevent_t *ev; - - ev = (isc_socketevent_t *)isc_event_allocate(mctx, sender, eventtype, - action, arg, sizeof(*ev)); - - ev->result = ISC_R_UNSET; - ISC_LINK_INIT(ev, ev_link); - ev->region.base = NULL; - ev->n = 0; - ev->offset = 0; - ev->attributes = 0; - ev->destroy = ev->ev_destroy; - ev->ev_destroy = destroy_socketevent; - ev->dscp = 0; - - return (ev); -} - -#if defined(ISC_SOCKET_DEBUG) -static void -dump_msg(struct msghdr *msg) { - unsigned int i; - - printf("MSGHDR %p\n", msg); - printf("\tname %p, namelen %ld\n", msg->msg_name, - (long)msg->msg_namelen); - printf("\tiov %p, iovlen %ld\n", msg->msg_iov, (long)msg->msg_iovlen); - for (i = 0; i < (unsigned int)msg->msg_iovlen; i++) - printf("\t\t%u\tbase %p, len %ld\n", i, - msg->msg_iov[i].iov_base, (long)msg->msg_iov[i].iov_len); - printf("\tcontrol %p, controllen %ld\n", msg->msg_control, - (long)msg->msg_controllen); -} -#endif /* if defined(ISC_SOCKET_DEBUG) */ - -#define DOIO_SUCCESS 0 /* i/o ok, event sent */ -#define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */ -#define DOIO_HARD 2 /* i/o error, event sent */ -#define DOIO_EOF 3 /* EOF, no event sent */ - -static int -doio_recv(isc_socket_t *sock, isc_socketevent_t *dev) { - int cc; - struct iovec iov[MAXSCATTERGATHER_RECV]; - size_t read_count; - struct msghdr msghdr; - int recv_errno; - char strbuf[ISC_STRERRORSIZE]; - char cmsgbuf[RECVCMSGBUFLEN] = { 0 }; - - build_msghdr_recv(sock, cmsgbuf, dev, &msghdr, iov, &read_count); - -#if defined(ISC_SOCKET_DEBUG) - dump_msg(&msghdr); -#endif /* if defined(ISC_SOCKET_DEBUG) */ - - cc = recvmsg(sock->fd, &msghdr, 0); - recv_errno = errno; - -#if defined(ISC_SOCKET_DEBUG) - dump_msg(&msghdr); -#endif /* if defined(ISC_SOCKET_DEBUG) */ - - if (cc < 0) { - if (SOFT_ERROR(recv_errno)) { - return (DOIO_SOFT); - } - - if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { - strerror_r(recv_errno, strbuf, sizeof(strbuf)); - socket_log(sock, NULL, IOEVENT, - "doio_recv: recvmsg(%d) %d bytes, err %d/%s", - sock->fd, cc, recv_errno, strbuf); - } - -#define SOFT_OR_HARD(_system, _isc) \ - if (recv_errno == _system) { \ - if (sock->connected) { \ - dev->result = _isc; \ - inc_stats(sock->manager->stats, \ - sock->statsindex[STATID_RECVFAIL]); \ - return (DOIO_HARD); \ - } \ - return (DOIO_SOFT); \ - } -#define ALWAYS_HARD(_system, _isc) \ - if (recv_errno == _system) { \ - dev->result = _isc; \ - inc_stats(sock->manager->stats, \ - sock->statsindex[STATID_RECVFAIL]); \ - return (DOIO_HARD); \ - } - - SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED); - SOFT_OR_HARD(ENETUNREACH, ISC_R_NETUNREACH); - SOFT_OR_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH); - SOFT_OR_HARD(EHOSTDOWN, ISC_R_HOSTDOWN); - SOFT_OR_HARD(ENOBUFS, ISC_R_NORESOURCES); - /* - * Older operating systems may still return EPROTO in some - * situations, for example when receiving ICMP/ICMPv6 errors. - * A real life scenario is when ICMPv6 returns code 5 or 6. - * These codes are introduced in RFC 4443 from March 2006, - * and the document obsoletes RFC 1885. But unfortunately not - * all operating systems have caught up with the new standard - * (in 2020) and thus a generic protocol error is returned. - */ - SOFT_OR_HARD(EPROTO, ISC_R_HOSTUNREACH); - /* Should never get this one but it was seen. */ -#ifdef ENOPROTOOPT - SOFT_OR_HARD(ENOPROTOOPT, ISC_R_HOSTUNREACH); -#endif /* ifdef ENOPROTOOPT */ - SOFT_OR_HARD(EINVAL, ISC_R_HOSTUNREACH); - -#undef SOFT_OR_HARD -#undef ALWAYS_HARD - - dev->result = isc__errno2result(recv_errno); - inc_stats(sock->manager->stats, - sock->statsindex[STATID_RECVFAIL]); - return (DOIO_HARD); - } - - /* - * On TCP and UNIX sockets, zero length reads indicate EOF, - * while on UDP sockets, zero length reads are perfectly valid, - * although strange. - */ - switch (sock->type) { - case isc_sockettype_tcp: - case isc_sockettype_unix: - if (cc == 0) { - return (DOIO_EOF); - } - break; - case isc_sockettype_udp: - case isc_sockettype_raw: - break; - default: - INSIST(0); - ISC_UNREACHABLE(); - } - - if (sock->type == isc_sockettype_udp) { - dev->address.length = msghdr.msg_namelen; - if (isc_sockaddr_getport(&dev->address) == 0) { - if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { - socket_log(sock, &dev->address, IOEVENT, - "dropping source port zero packet"); - } - return (DOIO_SOFT); - } - /* - * Simulate a firewall blocking UDP responses bigger than - * 'maxudp' bytes. - */ - if (sock->manager->maxudp != 0 && - cc > (int)sock->manager->maxudp) { - return (DOIO_SOFT); - } - } - - socket_log(sock, &dev->address, IOEVENT, "packet received correctly"); - - /* - * Overflow bit detection. If we received MORE bytes than we should, - * this indicates an overflow situation. Set the flag in the - * dev entry and adjust how much we read by one. - */ -#ifdef ISC_PLATFORM_RECVOVERFLOW - if ((sock->type == isc_sockettype_udp) && ((size_t)cc > read_count)) { - dev->attributes |= ISC_SOCKEVENTATTR_TRUNC; - cc--; - } -#endif /* ifdef ISC_PLATFORM_RECVOVERFLOW */ - - /* - * If there are control messages attached, run through them and pull - * out the interesting bits. - */ - process_cmsg(sock, &msghdr, dev); - - /* - * update the buffers (if any) and the i/o count - */ - dev->n += cc; - - /* - * If we read less than we expected, update counters, - * and let the upper layer poke the descriptor. - */ - if (((size_t)cc != read_count) && (dev->n < dev->minimum)) { - return (DOIO_SOFT); - } - - /* - * Full reads are posted, or partials if partials are ok. - */ - dev->result = ISC_R_SUCCESS; - return (DOIO_SUCCESS); -} - -/* - * Returns: - * DOIO_SUCCESS The operation succeeded. dev->result contains - * ISC_R_SUCCESS. - * - * DOIO_HARD A hard or unexpected I/O error was encountered. - * dev->result contains the appropriate error. - * - * DOIO_SOFT A soft I/O error was encountered. No senddone - * event was sent. The operation should be retried. - * - * No other return values are possible. - */ -static int -doio_send(isc_socket_t *sock, isc_socketevent_t *dev) { - int cc; - struct iovec iov[MAXSCATTERGATHER_SEND]; - size_t write_count; - struct msghdr msghdr; - char addrbuf[ISC_SOCKADDR_FORMATSIZE]; - int attempts = 0; - int send_errno; - char strbuf[ISC_STRERRORSIZE]; - char cmsgbuf[SENDCMSGBUFLEN] = { 0 }; - - build_msghdr_send(sock, cmsgbuf, dev, &msghdr, iov, &write_count); - -resend: - if (sock->type == isc_sockettype_udp && sock->manager->maxudp != 0 && - write_count > sock->manager->maxudp) - { - cc = write_count; - } else { - cc = sendmsg(sock->fd, &msghdr, 0); - } - send_errno = errno; - - /* - * Check for error or block condition. - */ - if (cc < 0) { - if (send_errno == EINTR && ++attempts < NRETRIES) { - goto resend; - } - - if (SOFT_ERROR(send_errno)) { - if (errno == EWOULDBLOCK || errno == EAGAIN) { - dev->result = ISC_R_WOULDBLOCK; - } - return (DOIO_SOFT); - } - -#define SOFT_OR_HARD(_system, _isc) \ - if (send_errno == _system) { \ - if (sock->connected) { \ - dev->result = _isc; \ - inc_stats(sock->manager->stats, \ - sock->statsindex[STATID_SENDFAIL]); \ - return (DOIO_HARD); \ - } \ - return (DOIO_SOFT); \ - } -#define ALWAYS_HARD(_system, _isc) \ - if (send_errno == _system) { \ - dev->result = _isc; \ - inc_stats(sock->manager->stats, \ - sock->statsindex[STATID_SENDFAIL]); \ - return (DOIO_HARD); \ - } - - SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED); - ALWAYS_HARD(EACCES, ISC_R_NOPERM); - ALWAYS_HARD(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); - ALWAYS_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); - ALWAYS_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH); -#ifdef EHOSTDOWN - ALWAYS_HARD(EHOSTDOWN, ISC_R_HOSTUNREACH); -#endif /* ifdef EHOSTDOWN */ - ALWAYS_HARD(ENETUNREACH, ISC_R_NETUNREACH); - SOFT_OR_HARD(ENOBUFS, ISC_R_NORESOURCES); - ALWAYS_HARD(EPERM, ISC_R_HOSTUNREACH); - ALWAYS_HARD(EPIPE, ISC_R_NOTCONNECTED); - ALWAYS_HARD(ECONNRESET, ISC_R_CONNECTIONRESET); - -#undef SOFT_OR_HARD -#undef ALWAYS_HARD - - /* - * The other error types depend on whether or not the - * socket is UDP or TCP. If it is UDP, some errors - * that we expect to be fatal under TCP are merely - * annoying, and are really soft errors. - * - * However, these soft errors are still returned as - * a status. - */ - isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf)); - strerror_r(send_errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "internal_send: %s: %s", - addrbuf, strbuf); - dev->result = isc__errno2result(send_errno); - inc_stats(sock->manager->stats, - sock->statsindex[STATID_SENDFAIL]); - return (DOIO_HARD); - } - - if (cc == 0) { - inc_stats(sock->manager->stats, - sock->statsindex[STATID_SENDFAIL]); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "doio_send: send() returned 0"); - } - - /* - * If we write less than we expected, update counters, poke. - */ - dev->n += cc; - if ((size_t)cc != write_count) { - return (DOIO_SOFT); - } - - /* - * Exactly what we wanted to write. We're done with this - * entry. Post its completion event. - */ - dev->result = ISC_R_SUCCESS; - return (DOIO_SUCCESS); -} - -/* - * Kill. - * - * Caller must ensure that the socket is not locked and no external - * references exist. - */ -static void -socketclose(isc__socketthread_t *thread, isc_socket_t *sock, int fd) { - int lockid = FDLOCK_ID(fd); - /* - * No one has this socket open, so the watcher doesn't have to be - * poked, and the socket doesn't have to be locked. - */ - LOCK(&thread->fdlock[lockid]); - thread->fds[fd] = NULL; - thread->fdstate[fd] = CLOSE_PENDING; - UNLOCK(&thread->fdlock[lockid]); - select_poke(thread->manager, thread->threadid, fd, SELECT_POKE_CLOSE); - - inc_stats(thread->manager->stats, sock->statsindex[STATID_CLOSE]); - - LOCK(&sock->lock); - if (sock->active == 1) { - dec_stats(thread->manager->stats, - sock->statsindex[STATID_ACTIVE]); - sock->active = 0; - } - UNLOCK(&sock->lock); - - /* - * update manager->maxfd here (XXX: this should be implemented more - * efficiently) - */ -#ifdef USE_SELECT - LOCK(&thread->manager->lock); - if (thread->maxfd == fd) { - int i; - - thread->maxfd = 0; - for (i = fd - 1; i >= 0; i--) { - lockid = FDLOCK_ID(i); - - LOCK(&thread->fdlock[lockid]); - if (thread->fdstate[i] == MANAGED) { - thread->maxfd = i; - UNLOCK(&thread->fdlock[lockid]); - break; - } - UNLOCK(&thread->fdlock[lockid]); - } - if (thread->maxfd < thread->pipe_fds[0]) { - thread->maxfd = thread->pipe_fds[0]; - } - } - - UNLOCK(&thread->manager->lock); -#endif /* USE_SELECT */ -} - -static void -destroy(isc_socket_t **sockp) { - int fd = 0; - isc_socket_t *sock = *sockp; - isc_socketmgr_t *manager = sock->manager; - isc__socketthread_t *thread = NULL; - - socket_log(sock, NULL, CREATION, "destroying"); - - isc_refcount_destroy(&sock->references); - - LOCK(&sock->lock); - INSIST(ISC_LIST_EMPTY(sock->connect_list)); - INSIST(ISC_LIST_EMPTY(sock->accept_list)); - INSIST(ISC_LIST_EMPTY(sock->recv_list)); - INSIST(ISC_LIST_EMPTY(sock->send_list)); - INSIST(sock->fd >= -1 && sock->fd < (int)manager->maxsocks); - - if (sock->fd >= 0) { - fd = sock->fd; - thread = &manager->threads[sock->threadid]; - sock->fd = -1; - sock->threadid = -1; - } - UNLOCK(&sock->lock); - - if (fd > 0) { - socketclose(thread, sock, fd); - } - - LOCK(&manager->lock); - - ISC_LIST_UNLINK(manager->socklist, sock, link); - - if (ISC_LIST_EMPTY(manager->socklist)) { - SIGNAL(&manager->shutdown_ok); - } - - /* can't unlock manager as its memory context is still used */ - free_socket(sockp); - - UNLOCK(&manager->lock); -} - -static isc_result_t -allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type, - isc_socket_t **socketp) { - isc_socket_t *sock; - - sock = isc_mem_get(manager->mctx, sizeof(*sock)); - - sock->magic = 0; - isc_refcount_init(&sock->references, 0); - - sock->manager = manager; - sock->type = type; - sock->fd = -1; - sock->threadid = -1; - sock->dscp = 0; /* TOS/TCLASS is zero until set. */ - sock->statsindex = NULL; - sock->active = 0; - - ISC_LINK_INIT(sock, link); - - memset(sock->name, 0, sizeof(sock->name)); - sock->tag = NULL; - - /* - * Set up list of readers and writers to be initially empty. - */ - ISC_LIST_INIT(sock->recv_list); - ISC_LIST_INIT(sock->send_list); - ISC_LIST_INIT(sock->accept_list); - ISC_LIST_INIT(sock->connect_list); - - sock->listener = 0; - sock->connected = 0; - sock->connecting = 0; - sock->bound = 0; - sock->pktdscp = 0; - - /* - * Initialize the lock. - */ - isc_mutex_init(&sock->lock); - - sock->magic = SOCKET_MAGIC; - *socketp = sock; - - return (ISC_R_SUCCESS); -} - -/* - * This event requires that the various lists be empty, that the reference - * count be 1, and that the magic number is valid. The other socket bits, - * like the lock, must be initialized as well. The fd associated must be - * marked as closed, by setting it to -1 on close, or this routine will - * also close the socket. - */ -static void -free_socket(isc_socket_t **socketp) { - isc_socket_t *sock = *socketp; - *socketp = NULL; - - INSIST(VALID_SOCKET(sock)); - isc_refcount_destroy(&sock->references); - LOCK(&sock->lock); - INSIST(!sock->connecting); - INSIST(ISC_LIST_EMPTY(sock->recv_list)); - INSIST(ISC_LIST_EMPTY(sock->send_list)); - INSIST(ISC_LIST_EMPTY(sock->accept_list)); - INSIST(ISC_LIST_EMPTY(sock->connect_list)); - INSIST(!ISC_LINK_LINKED(sock, link)); - UNLOCK(&sock->lock); - - sock->magic = 0; - - isc_mutex_destroy(&sock->lock); - - isc_mem_put(sock->manager->mctx, sock, sizeof(*sock)); -} - -#if defined(SET_RCVBUF) -static isc_once_t rcvbuf_once = ISC_ONCE_INIT; -static int rcvbuf = ISC_RECV_BUFFER_SIZE; - -static void -set_rcvbuf(void) { - int fd; - int max = rcvbuf, min; - socklen_t len; - - fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (fd == -1) { - switch (errno) { - case EPROTONOSUPPORT: - case EPFNOSUPPORT: - case EAFNOSUPPORT: - /* - * Linux 2.2 (and maybe others) return EINVAL instead of - * EAFNOSUPPORT. - */ - case EINVAL: - fd = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP); - break; - } - } - if (fd == -1) { - return; - } - - len = sizeof(min); - if (getsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *)&min, &len) == 0 && - min < rcvbuf) - { - again: - if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *)&rcvbuf, - sizeof(rcvbuf)) == -1) - { - if (errno == ENOBUFS && rcvbuf > min) { - max = rcvbuf - 1; - rcvbuf = (rcvbuf + min) / 2; - goto again; - } else { - rcvbuf = min; - goto cleanup; - } - } else { - min = rcvbuf; - } - if (min != max) { - rcvbuf = max; - goto again; - } - } -cleanup: - close(fd); -} -#endif /* ifdef SO_RCVBUF */ - -#if defined(SET_SNDBUF) -static isc_once_t sndbuf_once = ISC_ONCE_INIT; -static int sndbuf = ISC_SEND_BUFFER_SIZE; - -static void -set_sndbuf(void) { - int fd; - int max = sndbuf, min; - socklen_t len; - - fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (fd == -1) { - switch (errno) { - case EPROTONOSUPPORT: - case EPFNOSUPPORT: - case EAFNOSUPPORT: - /* - * Linux 2.2 (and maybe others) return EINVAL instead of - * EAFNOSUPPORT. - */ - case EINVAL: - fd = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP); - break; - } - } - if (fd == -1) { - return; - } - - len = sizeof(min); - if (getsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *)&min, &len) == 0 && - min < sndbuf) - { - again: - if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *)&sndbuf, - sizeof(sndbuf)) == -1) - { - if (errno == ENOBUFS && sndbuf > min) { - max = sndbuf - 1; - sndbuf = (sndbuf + min) / 2; - goto again; - } else { - sndbuf = min; - goto cleanup; - } - } else { - min = sndbuf; - } - if (min != max) { - sndbuf = max; - goto again; - } - } -cleanup: - close(fd); -} -#endif /* ifdef SO_SNDBUF */ - -static void -use_min_mtu(isc_socket_t *sock) { -#if !defined(IPV6_USE_MIN_MTU) && !defined(IPV6_MTU) - UNUSED(sock); -#endif /* if !defined(IPV6_USE_MIN_MTU) && !defined(IPV6_MTU) */ -#ifdef IPV6_USE_MIN_MTU - /* use minimum MTU */ - if (sock->pf == AF_INET6) { - int on = 1; - (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU, - (void *)&on, sizeof(on)); - } -#endif /* ifdef IPV6_USE_MIN_MTU */ -#if defined(IPV6_MTU) - /* - * Use minimum MTU on IPv6 sockets. - */ - if (sock->pf == AF_INET6) { - int mtu = 1280; - (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_MTU, &mtu, - sizeof(mtu)); - } -#endif /* if defined(IPV6_MTU) */ -} - -static void -set_tcp_maxseg(isc_socket_t *sock, int size) { -#ifdef TCP_MAXSEG - if (sock->type == isc_sockettype_tcp) { - (void)setsockopt(sock->fd, IPPROTO_TCP, TCP_MAXSEG, - (void *)&size, sizeof(size)); - } -#endif /* ifdef TCP_MAXSEG */ -} - -static void -set_ip_disable_pmtud(isc_socket_t *sock) { - /* - * Disable Path MTU Discover on IP packets - */ - if (sock->pf == AF_INET6) { -#if defined(IPV6_DONTFRAG) - (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_DONTFRAG, - &(int){ 0 }, sizeof(int)); -#endif -#if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT) - (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER, - &(int){ IP_PMTUDISC_OMIT }, sizeof(int)); -#endif - } else if (sock->pf == AF_INET) { -#if defined(IP_DONTFRAG) - (void)setsockopt(sock->fd, IPPROTO_IP, IP_DONTFRAG, &(int){ 0 }, - sizeof(int)); -#endif -#if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT) - (void)setsockopt(sock->fd, IPPROTO_IP, IP_MTU_DISCOVER, - &(int){ IP_PMTUDISC_OMIT }, sizeof(int)); -#endif - } -} - -static isc_result_t -opensocket(isc_socketmgr_t *manager, isc_socket_t *sock) { - isc_result_t result; - char strbuf[ISC_STRERRORSIZE]; - const char *err = "socket"; - int tries = 0; -#if defined(USE_CMSG) || defined(SO_NOSIGPIPE) - int on = 1; -#endif /* if defined(USE_CMSG) || defined(SO_NOSIGPIPE) */ -#if defined(SET_RCVBUF) || defined(SET_SNDBUF) - socklen_t optlen; - int size = 0; -#endif - -again: - switch (sock->type) { - case isc_sockettype_udp: - sock->fd = socket(sock->pf, SOCK_DGRAM, IPPROTO_UDP); - break; - case isc_sockettype_tcp: - sock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP); - break; - case isc_sockettype_unix: - sock->fd = socket(sock->pf, SOCK_STREAM, 0); - break; - case isc_sockettype_raw: - errno = EPFNOSUPPORT; - /* - * PF_ROUTE is a alias for PF_NETLINK on linux. - */ -#if defined(PF_ROUTE) - if (sock->fd == -1 && sock->pf == PF_ROUTE) { -#ifdef NETLINK_ROUTE - sock->fd = socket(sock->pf, SOCK_RAW, NETLINK_ROUTE); -#else /* ifdef NETLINK_ROUTE */ - sock->fd = socket(sock->pf, SOCK_RAW, 0); -#endif /* ifdef NETLINK_ROUTE */ - if (sock->fd != -1) { -#ifdef NETLINK_ROUTE - struct sockaddr_nl sa; - int n; - - /* - * Do an implicit bind. - */ - memset(&sa, 0, sizeof(sa)); - sa.nl_family = AF_NETLINK; - sa.nl_groups = RTMGRP_IPV4_IFADDR | - RTMGRP_IPV6_IFADDR; - n = bind(sock->fd, (struct sockaddr *)&sa, - sizeof(sa)); - if (n < 0) { - close(sock->fd); - sock->fd = -1; - } -#endif /* ifdef NETLINK_ROUTE */ - sock->bound = 1; - } - } -#endif /* if defined(PF_ROUTE) */ - break; - } - if (sock->fd == -1 && errno == EINTR && tries++ < 42) { - goto again; - } - -#ifdef F_DUPFD - /* - * Leave a space for stdio and TCP to work in. - */ - if (manager->reserved != 0 && sock->type == isc_sockettype_udp && - sock->fd >= 0 && sock->fd < manager->reserved) - { - int newfd, tmp; - newfd = fcntl(sock->fd, F_DUPFD, manager->reserved); - tmp = errno; - (void)close(sock->fd); - errno = tmp; - sock->fd = newfd; - err = "isc_socket_create: fcntl/reserved"; - } else if (sock->fd >= 0 && sock->fd < 20) { - int newfd, tmp; - newfd = fcntl(sock->fd, F_DUPFD, 20); - tmp = errno; - (void)close(sock->fd); - errno = tmp; - sock->fd = newfd; - err = "isc_socket_create: fcntl"; - } -#endif /* ifdef F_DUPFD */ - - if (sock->fd >= (int)manager->maxsocks) { - (void)close(sock->fd); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "socket: file descriptor exceeds limit (%d/%u)", - sock->fd, manager->maxsocks); - inc_stats(manager->stats, sock->statsindex[STATID_OPENFAIL]); - return (ISC_R_NORESOURCES); - } - - if (sock->fd < 0) { - switch (errno) { - case EMFILE: - case ENFILE: - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "%s: %s", err, strbuf); - /* fallthrough */ - case ENOBUFS: - inc_stats(manager->stats, - sock->statsindex[STATID_OPENFAIL]); - return (ISC_R_NORESOURCES); - - case EPROTONOSUPPORT: - case EPFNOSUPPORT: - case EAFNOSUPPORT: - /* - * Linux 2.2 (and maybe others) return EINVAL instead of - * EAFNOSUPPORT. - */ - case EINVAL: - inc_stats(manager->stats, - sock->statsindex[STATID_OPENFAIL]); - return (ISC_R_FAMILYNOSUPPORT); - - default: - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "%s() failed: %s", - err, strbuf); - inc_stats(manager->stats, - sock->statsindex[STATID_OPENFAIL]); - return (ISC_R_UNEXPECTED); - } - } - - result = make_nonblock(sock->fd); - if (result != ISC_R_SUCCESS) { - (void)close(sock->fd); - inc_stats(manager->stats, sock->statsindex[STATID_OPENFAIL]); - return (result); - } - -#ifdef SO_NOSIGPIPE - if (setsockopt(sock->fd, SOL_SOCKET, SO_NOSIGPIPE, (void *)&on, - sizeof(on)) < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, SO_NOSIGPIPE) failed: %s", - sock->fd, strbuf); - /* Press on... */ - } -#endif /* ifdef SO_NOSIGPIPE */ - - /* - * Use minimum mtu if possible. - */ - if (sock->type == isc_sockettype_tcp && sock->pf == AF_INET6) { - use_min_mtu(sock); - set_tcp_maxseg(sock, 1280 - 20 - 40); /* 1280 - TCP - IPV6 */ - } - -#if defined(USE_CMSG) || defined(SET_RCVBUF) || defined(SET_SNDBUF) - if (sock->type == isc_sockettype_udp) { -#if defined(USE_CMSG) -#if defined(SO_TIMESTAMP) - if (setsockopt(sock->fd, SOL_SOCKET, SO_TIMESTAMP, (void *)&on, - sizeof(on)) < 0 && - errno != ENOPROTOOPT) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, SO_TIMESTAMP) failed: " - "%s", - sock->fd, strbuf); - /* Press on... */ - } -#endif /* SO_TIMESTAMP */ - -#ifdef IPV6_RECVPKTINFO - /* RFC 3542 */ - if ((sock->pf == AF_INET6) && - (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, - (void *)&on, sizeof(on)) < 0)) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IPV6_RECVPKTINFO) " - "failed: %s", - sock->fd, strbuf); - } -#else /* ifdef IPV6_RECVPKTINFO */ - /* RFC 2292 */ - if ((sock->pf == AF_INET6) && - (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO, - (void *)&on, sizeof(on)) < 0)) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IPV6_PKTINFO) failed: " - "%s", - sock->fd, strbuf); - } -#endif /* IPV6_RECVPKTINFO */ -#endif /* defined(USE_CMSG) */ - -#if defined(SET_RCVBUF) - optlen = sizeof(size); - if (getsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, (void *)&size, - &optlen) == 0 && - size < rcvbuf) - { - RUNTIME_CHECK(isc_once_do(&rcvbuf_once, set_rcvbuf) == - ISC_R_SUCCESS); - if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, - (void *)&rcvbuf, sizeof(rcvbuf)) == -1) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, SO_RCVBUF, " - "%d) failed: %s", - sock->fd, rcvbuf, strbuf); - } - } -#endif /* if defined(SET_RCVBUF) */ - -#if defined(SET_SNDBUF) - optlen = sizeof(size); - if (getsockopt(sock->fd, SOL_SOCKET, SO_SNDBUF, (void *)&size, - &optlen) == 0 && - size < sndbuf) - { - RUNTIME_CHECK(isc_once_do(&sndbuf_once, set_sndbuf) == - ISC_R_SUCCESS); - if (setsockopt(sock->fd, SOL_SOCKET, SO_SNDBUF, - (void *)&sndbuf, sizeof(sndbuf)) == -1) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, SO_SNDBUF, " - "%d) failed: %s", - sock->fd, sndbuf, strbuf); - } - } -#endif /* if defined(SO_SNDBUF) */ - } -#ifdef IPV6_RECVTCLASS - if ((sock->pf == AF_INET6) && - (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVTCLASS, (void *)&on, - sizeof(on)) < 0)) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IPV6_RECVTCLASS) " - "failed: %s", - sock->fd, strbuf); - } -#endif /* ifdef IPV6_RECVTCLASS */ -#ifdef IP_RECVTOS - if ((sock->pf == AF_INET) && - (setsockopt(sock->fd, IPPROTO_IP, IP_RECVTOS, (void *)&on, - sizeof(on)) < 0)) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IP_RECVTOS) " - "failed: %s", - sock->fd, strbuf); - } -#endif /* ifdef IP_RECVTOS */ -#endif /* defined(USE_CMSG) || defined(SET_RCVBUF) || defined(SET_SNDBUF) */ - - set_ip_disable_pmtud(sock); - - inc_stats(manager->stats, sock->statsindex[STATID_OPEN]); - if (sock->active == 0) { - inc_stats(manager->stats, sock->statsindex[STATID_ACTIVE]); - sock->active = 1; - } - - return (ISC_R_SUCCESS); -} - -/* - * Create a 'type' socket, managed by 'manager'. Events will be posted to - * 'task' and when dispatched 'action' will be called with 'arg' as the arg - * value. The new socket is returned in 'socketp'. - */ -static isc_result_t -socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, - isc_socket_t **socketp) { - isc_socket_t *sock = NULL; - isc__socketthread_t *thread; - isc_result_t result; - int lockid; - - REQUIRE(VALID_MANAGER(manager)); - REQUIRE(socketp != NULL && *socketp == NULL); - - result = allocate_socket(manager, type, &sock); - if (result != ISC_R_SUCCESS) { - return (result); - } - - switch (sock->type) { - case isc_sockettype_udp: - sock->statsindex = (pf == AF_INET) ? udp4statsindex - : udp6statsindex; -#define DCSPPKT(pf) ((pf == AF_INET) ? ISC_NET_DSCPPKTV4 : ISC_NET_DSCPPKTV6) - sock->pktdscp = (isc_net_probedscp() & DCSPPKT(pf)) != 0; - break; - case isc_sockettype_tcp: - sock->statsindex = (pf == AF_INET) ? tcp4statsindex - : tcp6statsindex; - break; - case isc_sockettype_unix: - sock->statsindex = unixstatsindex; - break; - case isc_sockettype_raw: - sock->statsindex = rawstatsindex; - break; - default: - INSIST(0); - ISC_UNREACHABLE(); - } - - sock->pf = pf; - - result = opensocket(manager, sock); - if (result != ISC_R_SUCCESS) { - free_socket(&sock); - return (result); - } - - if (sock->fd == -1) { - abort(); - } - sock->threadid = gen_threadid(sock); - isc_refcount_increment0(&sock->references); - thread = &manager->threads[sock->threadid]; - *socketp = sock; - - /* - * Note we don't have to lock the socket like we normally would because - * there are no external references to it yet. - */ - - lockid = FDLOCK_ID(sock->fd); - LOCK(&thread->fdlock[lockid]); - thread->fds[sock->fd] = sock; - thread->fdstate[sock->fd] = MANAGED; -#if defined(USE_EPOLL) - thread->epoll_events[sock->fd] = 0; -#endif /* if defined(USE_EPOLL) */ -#ifdef USE_DEVPOLL - INSIST(thread->fdpollinfo[sock->fd].want_read == 0 && - thread->fdpollinfo[sock->fd].want_write == 0); -#endif /* ifdef USE_DEVPOLL */ - UNLOCK(&thread->fdlock[lockid]); - - LOCK(&manager->lock); - ISC_LIST_APPEND(manager->socklist, sock, link); -#ifdef USE_SELECT - if (thread->maxfd < sock->fd) { - thread->maxfd = sock->fd; - } -#endif /* ifdef USE_SELECT */ - UNLOCK(&manager->lock); - - socket_log(sock, NULL, CREATION, "created"); - - return (ISC_R_SUCCESS); -} - -/*% - * Create a new 'type' socket managed by 'manager'. Events - * will be posted to 'task' and when dispatched 'action' will be - * called with 'arg' as the arg value. The new socket is returned - * in 'socketp'. - */ -isc_result_t -isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, - isc_socket_t **socketp) { - return (socket_create(manager, pf, type, socketp)); -} - -isc_result_t -isc_socket_open(isc_socket_t *sock) { - isc_result_t result; - isc__socketthread_t *thread; - - REQUIRE(VALID_SOCKET(sock)); - - LOCK(&sock->lock); - - REQUIRE(isc_refcount_current(&sock->references) >= 1); - REQUIRE(sock->fd == -1); - REQUIRE(sock->threadid == -1); - - result = opensocket(sock->manager, sock); - - UNLOCK(&sock->lock); - - if (result != ISC_R_SUCCESS) { - sock->fd = -1; - } else { - sock->threadid = gen_threadid(sock); - thread = &sock->manager->threads[sock->threadid]; - int lockid = FDLOCK_ID(sock->fd); - - LOCK(&thread->fdlock[lockid]); - thread->fds[sock->fd] = sock; - thread->fdstate[sock->fd] = MANAGED; -#if defined(USE_EPOLL) - thread->epoll_events[sock->fd] = 0; -#endif /* if defined(USE_EPOLL) */ -#ifdef USE_DEVPOLL - INSIST(thread->fdpollinfo[sock->fd].want_read == 0 && - thread->fdpollinfo[sock->fd].want_write == 0); -#endif /* ifdef USE_DEVPOLL */ - UNLOCK(&thread->fdlock[lockid]); - -#ifdef USE_SELECT - LOCK(&sock->manager->lock); - if (thread->maxfd < sock->fd) { - thread->maxfd = sock->fd; - } - UNLOCK(&sock->manager->lock); -#endif /* ifdef USE_SELECT */ - } - - return (result); -} - -/* - * Attach to a socket. Caller must explicitly detach when it is done. - */ -void -isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp) { - REQUIRE(VALID_SOCKET(sock)); - REQUIRE(socketp != NULL && *socketp == NULL); - - int old_refs = isc_refcount_increment(&sock->references); - REQUIRE(old_refs > 0); - - *socketp = sock; -} - -/* - * Dereference a socket. If this is the last reference to it, clean things - * up by destroying the socket. - */ -void -isc_socket_detach(isc_socket_t **socketp) { - isc_socket_t *sock; - - REQUIRE(socketp != NULL); - sock = *socketp; - REQUIRE(VALID_SOCKET(sock)); - if (isc_refcount_decrement(&sock->references) == 1) { - destroy(&sock); - } - - *socketp = NULL; -} - -isc_result_t -isc_socket_close(isc_socket_t *sock) { - int fd; - isc_socketmgr_t *manager; - isc__socketthread_t *thread; - - REQUIRE(VALID_SOCKET(sock)); - - LOCK(&sock->lock); - - REQUIRE(sock->fd >= 0 && sock->fd < (int)sock->manager->maxsocks); - - INSIST(!sock->connecting); - INSIST(ISC_LIST_EMPTY(sock->recv_list)); - INSIST(ISC_LIST_EMPTY(sock->send_list)); - INSIST(ISC_LIST_EMPTY(sock->accept_list)); - INSIST(ISC_LIST_EMPTY(sock->connect_list)); - - manager = sock->manager; - thread = &manager->threads[sock->threadid]; - fd = sock->fd; - sock->fd = -1; - sock->threadid = -1; - - memset(sock->name, 0, sizeof(sock->name)); - sock->tag = NULL; - sock->listener = 0; - sock->connected = 0; - sock->connecting = 0; - sock->bound = 0; - isc_sockaddr_any(&sock->peer_address); - - UNLOCK(&sock->lock); - - socketclose(thread, sock, fd); - - return (ISC_R_SUCCESS); -} - -/* - * Dequeue an item off the given socket's read queue, set the result code - * in the done event to the one provided, and send it to the task it was - * destined for. - * - * If the event to be sent is on a list, remove it before sending. If - * asked to, send and detach from the socket as well. - * - * Caller must have the socket locked if the event is attached to the socket. - */ -static void -send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) { - isc_task_t *task; - - task = (*dev)->ev_sender; - - (*dev)->ev_sender = sock; - - if (ISC_LINK_LINKED(*dev, ev_link)) { - ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link); - } - - if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) != 0) { - isc_task_sendtoanddetach(&task, (isc_event_t **)dev, - sock->threadid); - } else { - isc_task_sendto(task, (isc_event_t **)dev, sock->threadid); - } -} - -/* - * See comments for send_recvdone_event() above. - * - * Caller must have the socket locked if the event is attached to the socket. - */ -static void -send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) { - isc_task_t *task; - - INSIST(dev != NULL && *dev != NULL); - - task = (*dev)->ev_sender; - (*dev)->ev_sender = sock; - - if (ISC_LINK_LINKED(*dev, ev_link)) { - ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link); - } - - if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) != 0) { - isc_task_sendtoanddetach(&task, (isc_event_t **)dev, - sock->threadid); - } else { - isc_task_sendto(task, (isc_event_t **)dev, sock->threadid); - } -} - -/* - * See comments for send_recvdone_event() above. - * - * Caller must have the socket locked if the event is attached to the socket. - */ -static void -send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **dev) { - isc_task_t *task; - - INSIST(dev != NULL && *dev != NULL); - - task = (*dev)->ev_sender; - (*dev)->ev_sender = sock; - - if (ISC_LINK_LINKED(*dev, ev_link)) { - ISC_LIST_DEQUEUE(sock->connect_list, *dev, ev_link); - } - - isc_task_sendtoanddetach(&task, (isc_event_t **)dev, sock->threadid); -} - -/* - * Call accept() on a socket, to get the new file descriptor. The listen - * socket is used as a prototype to create a new isc_socket_t. The new - * socket has one outstanding reference. The task receiving the event - * will be detached from just after the event is delivered. - * - * On entry to this function, the event delivered is the internal - * readable event, and the first item on the accept_list should be - * the done event we want to send. If the list is empty, this is a no-op, - * so just unlock and return. - */ -static void -internal_accept(isc_socket_t *sock) { - isc_socketmgr_t *manager; - isc__socketthread_t *thread, *nthread; - isc_socket_newconnev_t *dev; - isc_task_t *task; - socklen_t addrlen; - int fd; - isc_result_t result = ISC_R_SUCCESS; - char strbuf[ISC_STRERRORSIZE]; - const char *err = "accept"; - - INSIST(VALID_SOCKET(sock)); - REQUIRE(sock->fd >= 0); - - socket_log(sock, NULL, TRACE, "internal_accept called, locked socket"); - - manager = sock->manager; - INSIST(VALID_MANAGER(manager)); - thread = &manager->threads[sock->threadid]; - - INSIST(sock->listener); - - /* - * Get the first item off the accept list. - * If it is empty, unlock the socket and return. - */ - dev = ISC_LIST_HEAD(sock->accept_list); - if (dev == NULL) { - unwatch_fd(thread, sock->fd, SELECT_POKE_ACCEPT); - UNLOCK(&sock->lock); - return; - } - - /* - * Try to accept the new connection. If the accept fails with - * EAGAIN or EINTR, simply poke the watcher to watch this socket - * again. Also ignore ECONNRESET, which has been reported to - * be spuriously returned on Linux 2.2.19 although it is not - * a documented error for accept(). ECONNABORTED has been - * reported for Solaris 8. The rest are thrown in not because - * we have seen them but because they are ignored by other - * daemons such as BIND 8 and Apache. - */ - - addrlen = sizeof(NEWCONNSOCK(dev)->peer_address.type); - memset(&NEWCONNSOCK(dev)->peer_address.type, 0, addrlen); - fd = accept(sock->fd, &NEWCONNSOCK(dev)->peer_address.type.sa, - (void *)&addrlen); - -#ifdef F_DUPFD - /* - * Leave a space for stdio to work in. - */ - if (fd >= 0 && fd < 20) { - int newfd, tmp; - newfd = fcntl(fd, F_DUPFD, 20); - tmp = errno; - (void)close(fd); - errno = tmp; - fd = newfd; - err = "accept/fcntl"; - } -#endif /* ifdef F_DUPFD */ - - if (fd < 0) { - if (SOFT_ERROR(errno)) { - goto soft_error; - } - switch (errno) { - case ENFILE: - case EMFILE: - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "%s: too many open file descriptors", - err); - goto soft_error; - - case ENOBUFS: - case ENOMEM: - case ECONNRESET: - case ECONNABORTED: - case EHOSTUNREACH: - case EHOSTDOWN: - case ENETUNREACH: - case ENETDOWN: - case ECONNREFUSED: -#ifdef EPROTO - case EPROTO: -#endif /* ifdef EPROTO */ -#ifdef ENONET - case ENONET: -#endif /* ifdef ENONET */ - goto soft_error; - default: - break; - } - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "internal_accept: %s() failed: %s", err, - strbuf); - fd = -1; - result = ISC_R_UNEXPECTED; - } else { - if (addrlen == 0U) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "internal_accept(): " - "accept() failed to return " - "remote address"); - - (void)close(fd); - goto soft_error; - } else if (NEWCONNSOCK(dev)->peer_address.type.sa.sa_family != - sock->pf) { - UNEXPECTED_ERROR( - __FILE__, __LINE__, - "internal_accept(): " - "accept() returned peer address " - "family %u (expected %u)", - NEWCONNSOCK(dev)->peer_address.type.sa.sa_family, - sock->pf); - (void)close(fd); - goto soft_error; - } else if (fd >= (int)manager->maxsocks) { - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "accept: file descriptor exceeds limit " - "(%d/%u)", - fd, manager->maxsocks); - (void)close(fd); - goto soft_error; - } - } - - if (fd != -1) { - NEWCONNSOCK(dev)->peer_address.length = addrlen; - NEWCONNSOCK(dev)->pf = sock->pf; - } - - /* - * Pull off the done event. - */ - ISC_LIST_UNLINK(sock->accept_list, dev, ev_link); - - /* - * Poke watcher if there are more pending accepts. - */ - if (ISC_LIST_EMPTY(sock->accept_list)) { - unwatch_fd(thread, sock->fd, SELECT_POKE_ACCEPT); - } - - if (fd != -1) { - result = make_nonblock(fd); - if (result != ISC_R_SUCCESS) { - (void)close(fd); - fd = -1; - } - } - - /* - * We need to unlock sock->lock now to be able to lock manager->lock - * without risking a deadlock with xmlstats. - */ - UNLOCK(&sock->lock); - - /* - * -1 means the new socket didn't happen. - */ - if (fd != -1) { - int lockid = FDLOCK_ID(fd); - - NEWCONNSOCK(dev)->fd = fd; - NEWCONNSOCK(dev)->threadid = gen_threadid(NEWCONNSOCK(dev)); - NEWCONNSOCK(dev)->bound = 1; - NEWCONNSOCK(dev)->connected = 1; - nthread = &manager->threads[NEWCONNSOCK(dev)->threadid]; - - /* - * We already hold a lock on one fdlock in accepting thread, - * we need to make sure that we don't double lock. - */ - bool same_bucket = (sock->threadid == - NEWCONNSOCK(dev)->threadid) && - (FDLOCK_ID(sock->fd) == lockid); - - /* - * Use minimum mtu if possible. - */ - use_min_mtu(NEWCONNSOCK(dev)); - set_tcp_maxseg(NEWCONNSOCK(dev), 1280 - 20 - 40); - - /* - * Ensure DSCP settings are inherited across accept. - */ - setdscp(NEWCONNSOCK(dev), sock->dscp); - - /* - * Save away the remote address - */ - dev->address = NEWCONNSOCK(dev)->peer_address; - - if (NEWCONNSOCK(dev)->active == 0) { - inc_stats(manager->stats, - NEWCONNSOCK(dev)->statsindex[STATID_ACTIVE]); - NEWCONNSOCK(dev)->active = 1; - } - - if (!same_bucket) { - LOCK(&nthread->fdlock[lockid]); - } - nthread->fds[fd] = NEWCONNSOCK(dev); - nthread->fdstate[fd] = MANAGED; -#if defined(USE_EPOLL) - nthread->epoll_events[fd] = 0; -#endif /* if defined(USE_EPOLL) */ - if (!same_bucket) { - UNLOCK(&nthread->fdlock[lockid]); - } - - LOCK(&manager->lock); - -#ifdef USE_SELECT - if (nthread->maxfd < fd) { - nthread->maxfd = fd; - } -#endif /* ifdef USE_SELECT */ - - socket_log(sock, &NEWCONNSOCK(dev)->peer_address, CREATION, - "accepted connection, new socket %p", - dev->newsocket); - - ISC_LIST_APPEND(manager->socklist, NEWCONNSOCK(dev), link); - - UNLOCK(&manager->lock); - - inc_stats(manager->stats, sock->statsindex[STATID_ACCEPT]); - } else { - inc_stats(manager->stats, sock->statsindex[STATID_ACCEPTFAIL]); - isc_refcount_decrementz(&NEWCONNSOCK(dev)->references); - free_socket((isc_socket_t **)&dev->newsocket); - } - - /* - * Fill in the done event details and send it off. - */ - dev->result = result; - task = dev->ev_sender; - dev->ev_sender = sock; - - isc_task_sendtoanddetach(&task, ISC_EVENT_PTR(&dev), sock->threadid); - return; - -soft_error: - watch_fd(thread, sock->fd, SELECT_POKE_ACCEPT); - UNLOCK(&sock->lock); - - inc_stats(manager->stats, sock->statsindex[STATID_ACCEPTFAIL]); - return; -} - -static void -internal_recv(isc_socket_t *sock) { - isc_socketevent_t *dev; - - INSIST(VALID_SOCKET(sock)); - REQUIRE(sock->fd >= 0); - - dev = ISC_LIST_HEAD(sock->recv_list); - if (dev == NULL) { - goto finish; - } - - socket_log(sock, NULL, IOEVENT, "internal_recv: event %p -> task %p", - dev, dev->ev_sender); - - /* - * Try to do as much I/O as possible on this socket. There are no - * limits here, currently. - */ - while (dev != NULL) { - switch (doio_recv(sock, dev)) { - case DOIO_SOFT: - goto finish; - - case DOIO_EOF: - /* - * read of 0 means the remote end was closed. - * Run through the event queue and dispatch all - * the events with an EOF result code. - */ - do { - dev->result = ISC_R_EOF; - send_recvdone_event(sock, &dev); - dev = ISC_LIST_HEAD(sock->recv_list); - } while (dev != NULL); - goto finish; - - case DOIO_SUCCESS: - case DOIO_HARD: - send_recvdone_event(sock, &dev); - break; - } - - dev = ISC_LIST_HEAD(sock->recv_list); - } - -finish: - if (ISC_LIST_EMPTY(sock->recv_list)) { - unwatch_fd(&sock->manager->threads[sock->threadid], sock->fd, - SELECT_POKE_READ); - } -} - -static void -internal_send(isc_socket_t *sock) { - isc_socketevent_t *dev; - - INSIST(VALID_SOCKET(sock)); - REQUIRE(sock->fd >= 0); - - dev = ISC_LIST_HEAD(sock->send_list); - if (dev == NULL) { - goto finish; - } - socket_log(sock, NULL, EVENT, "internal_send: event %p -> task %p", dev, - dev->ev_sender); - - /* - * Try to do as much I/O as possible on this socket. There are no - * limits here, currently. - */ - while (dev != NULL) { - switch (doio_send(sock, dev)) { - case DOIO_SOFT: - goto finish; - - case DOIO_HARD: - case DOIO_SUCCESS: - send_senddone_event(sock, &dev); - break; - } - - dev = ISC_LIST_HEAD(sock->send_list); - } - -finish: - if (ISC_LIST_EMPTY(sock->send_list)) { - unwatch_fd(&sock->manager->threads[sock->threadid], sock->fd, - SELECT_POKE_WRITE); - } -} - -/* - * Process read/writes on each fd here. Avoid locking - * and unlocking twice if both reads and writes are possible. - */ -static void -process_fd(isc__socketthread_t *thread, int fd, bool readable, bool writeable) { - isc_socket_t *sock; - int lockid = FDLOCK_ID(fd); - - /* - * If the socket is going to be closed, don't do more I/O. - */ - LOCK(&thread->fdlock[lockid]); - if (thread->fdstate[fd] == CLOSE_PENDING) { - UNLOCK(&thread->fdlock[lockid]); - - (void)unwatch_fd(thread, fd, SELECT_POKE_READ); - (void)unwatch_fd(thread, fd, SELECT_POKE_WRITE); - return; - } - - sock = thread->fds[fd]; - if (sock == NULL) { - UNLOCK(&thread->fdlock[lockid]); - return; - } - - LOCK(&sock->lock); - - if (sock->fd < 0) { - /* - * Sock is being closed - the final external reference - * is gone but it was not yet removed from event loop - * and fdstate[]/fds[] as destroy() is waiting on - * thread->fdlock[lockid] or sock->lock that we're holding. - * Just release the locks and bail. - */ - UNLOCK(&sock->lock); - UNLOCK(&thread->fdlock[lockid]); - return; - } - - REQUIRE(readable || writeable); - if (writeable) { - if (sock->connecting) { - internal_connect(sock); - } else { - internal_send(sock); - } - } - - if (readable) { - if (sock->listener) { - internal_accept(sock); /* unlocks sock */ - } else { - internal_recv(sock); - UNLOCK(&sock->lock); - } - } else { - UNLOCK(&sock->lock); - } - - UNLOCK(&thread->fdlock[lockid]); - - /* - * Socket destruction might be pending, it will resume - * after releasing fdlock and sock->lock. - */ -} - -/* - * process_fds is different for different event loops - * it takes the events from event loops and for each FD - * launches process_fd - */ -#ifdef USE_KQUEUE -static bool -process_fds(isc__socketthread_t *thread, struct kevent *events, int nevents) { - int i; - bool readable, writable; - bool done = false; - bool have_ctlevent = false; - if (nevents == thread->nevents) { - /* - * This is not an error, but something unexpected. If this - * happens, it may indicate the need for increasing - * ISC_SOCKET_MAXEVENTS. - */ - thread_log(thread, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_INFO, - "maximum number of FD events (%d) received", - nevents); - } - - for (i = 0; i < nevents; i++) { - REQUIRE(events[i].ident < thread->manager->maxsocks); - if (events[i].ident == (uintptr_t)thread->pipe_fds[0]) { - have_ctlevent = true; - continue; - } - readable = (events[i].filter == EVFILT_READ); - writable = (events[i].filter == EVFILT_WRITE); - process_fd(thread, events[i].ident, readable, writable); - } - - if (have_ctlevent) { - done = process_ctlfd(thread); - } - - return (done); -} -#elif defined(USE_EPOLL) -static bool -process_fds(isc__socketthread_t *thread, struct epoll_event *events, - int nevents) { - int i; - bool done = false; - bool have_ctlevent = false; - - if (nevents == thread->nevents) { - thread_log(thread, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_INFO, - "maximum number of FD events (%d) received", - nevents); - } - - for (i = 0; i < nevents; i++) { - REQUIRE(events[i].data.fd < (int)thread->manager->maxsocks); - if (events[i].data.fd == thread->pipe_fds[0]) { - have_ctlevent = true; - continue; - } - if ((events[i].events & EPOLLERR) != 0 || - (events[i].events & EPOLLHUP) != 0) { - /* - * epoll does not set IN/OUT bits on an erroneous - * condition, so we need to try both anyway. This is a - * bit inefficient, but should be okay for such rare - * events. Note also that the read or write attempt - * won't block because we use non-blocking sockets. - */ - int fd = events[i].data.fd; - events[i].events |= thread->epoll_events[fd]; - } - process_fd(thread, events[i].data.fd, - (events[i].events & EPOLLIN) != 0, - (events[i].events & EPOLLOUT) != 0); - } - - if (have_ctlevent) { - done = process_ctlfd(thread); - } - - return (done); -} -#elif defined(USE_DEVPOLL) -static bool -process_fds(isc__socketthread_t *thread, struct pollfd *events, int nevents) { - int i; - bool done = false; - bool have_ctlevent = false; - - if (nevents == thread->nevents) { - thread_log(thread, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_INFO, - "maximum number of FD events (%d) received", - nevents); - } - - for (i = 0; i < nevents; i++) { - REQUIRE(events[i].fd < (int)thread->manager->maxsocks); - if (events[i].fd == thread->pipe_fds[0]) { - have_ctlevent = true; - continue; - } - process_fd(thread, events[i].fd, - (events[i].events & POLLIN) != 0, - (events[i].events & POLLOUT) != 0); - } - - if (have_ctlevent) { - done = process_ctlfd(thread); - } - - return (done); -} -#elif defined(USE_SELECT) -static void -process_fds(isc__socketthread_t *thread, int maxfd, fd_set *readfds, - fd_set *writefds) { - int i; - - REQUIRE(maxfd <= (int)thread->manager->maxsocks); - - for (i = 0; i < maxfd; i++) { - if (i == thread->pipe_fds[0] || i == thread->pipe_fds[1]) { - continue; - } - process_fd(thread, i, FD_ISSET(i, readfds), - FD_ISSET(i, writefds)); - } -} -#endif /* ifdef USE_KQUEUE */ - -static bool -process_ctlfd(isc__socketthread_t *thread) { - int msg, fd; - - for (;;) { - select_readmsg(thread, &fd, &msg); - - thread_log(thread, IOEVENT, - "watcher got message %d for socket %d", msg, fd); - - /* - * Nothing to read? - */ - if (msg == SELECT_POKE_NOTHING) { - break; - } - - /* - * Handle shutdown message. We really should - * jump out of this loop right away, but - * it doesn't matter if we have to do a little - * more work first. - */ - if (msg == SELECT_POKE_SHUTDOWN) { - return (true); - } - - /* - * This is a wakeup on a socket. Look - * at the event queue for both read and write, - * and decide if we need to watch on it now - * or not. - */ - wakeup_socket(thread, fd, msg); - } - - return (false); -} - -/* - * This is the thread that will loop forever, always in a select or poll - * call. - * - * When select returns something to do, do whatever's necessary and post - * an event to the task that was requesting the action. - */ -static isc_threadresult_t -netthread(void *uap) { - isc__socketthread_t *thread = uap; - isc_socketmgr_t *manager = thread->manager; - (void)manager; - bool done; - int cc; -#ifdef USE_KQUEUE - const char *fnname = "kevent()"; -#elif defined(USE_EPOLL) - const char *fnname = "epoll_wait()"; -#elif defined(USE_DEVPOLL) - isc_result_t result; - const char *fnname = "ioctl(DP_POLL)"; - struct dvpoll dvp; - int pass; -#if defined(ISC_SOCKET_USE_POLLWATCH) - pollstate_t pollstate = poll_idle; -#endif /* if defined(ISC_SOCKET_USE_POLLWATCH) */ -#elif defined(USE_SELECT) - const char *fnname = "select()"; - int maxfd; - int ctlfd; -#endif /* ifdef USE_KQUEUE */ - char strbuf[ISC_STRERRORSIZE]; - -#if defined(USE_SELECT) - /* - * Get the control fd here. This will never change. - */ - ctlfd = thread->pipe_fds[0]; -#endif /* if defined(USE_SELECT) */ - done = false; - while (!done) { - do { -#ifdef USE_KQUEUE - cc = kevent(thread->kqueue_fd, NULL, 0, thread->events, - thread->nevents, NULL); -#elif defined(USE_EPOLL) - cc = epoll_wait(thread->epoll_fd, thread->events, - thread->nevents, -1); -#elif defined(USE_DEVPOLL) - /* - * Re-probe every thousand calls. - */ - if (thread->calls++ > 1000U) { - result = isc_resource_getcurlimit( - isc_resource_openfiles, - &thread->open_max); - if (result != ISC_R_SUCCESS) { - thread->open_max = 64; - } - thread->calls = 0; - } - for (pass = 0; pass < 2; pass++) { - dvp.dp_fds = thread->events; - dvp.dp_nfds = thread->nevents; - if (dvp.dp_nfds >= thread->open_max) { - dvp.dp_nfds = thread->open_max - 1; - } -#ifndef ISC_SOCKET_USE_POLLWATCH - dvp.dp_timeout = -1; -#else /* ifndef ISC_SOCKET_USE_POLLWATCH */ - if (pollstate == poll_idle) { - dvp.dp_timeout = -1; - } else { - dvp.dp_timeout = - ISC_SOCKET_POLLWATCH_TIMEOUT; - } -#endif /* ISC_SOCKET_USE_POLLWATCH */ - cc = ioctl(thread->devpoll_fd, DP_POLL, &dvp); - if (cc == -1 && errno == EINVAL) { - /* - * {OPEN_MAX} may have dropped. Look - * up the current value and try again. - */ - result = isc_resource_getcurlimit( - isc_resource_openfiles, - &thread->open_max); - if (result != ISC_R_SUCCESS) { - thread->open_max = 64; - } - } else { - break; - } - } -#elif defined(USE_SELECT) - /* - * We will have only one thread anyway, we can lock - * manager lock and don't care - */ - LOCK(&manager->lock); - memmove(thread->read_fds_copy, thread->read_fds, - thread->fd_bufsize); - memmove(thread->write_fds_copy, thread->write_fds, - thread->fd_bufsize); - maxfd = thread->maxfd + 1; - UNLOCK(&manager->lock); - - cc = select(maxfd, thread->read_fds_copy, - thread->write_fds_copy, NULL, NULL); -#endif /* USE_KQUEUE */ - - if (cc < 0 && !SOFT_ERROR(errno)) { - strerror_r(errno, strbuf, sizeof(strbuf)); - FATAL_ERROR(__FILE__, __LINE__, "%s failed: %s", - fnname, strbuf); - } - -#if defined(USE_DEVPOLL) && defined(ISC_SOCKET_USE_POLLWATCH) - if (cc == 0) { - if (pollstate == poll_active) { - pollstate = poll_checking; - } else if (pollstate == poll_checking) { - pollstate = poll_idle; - } - } else if (cc > 0) { - if (pollstate == poll_checking) { - /* - * XXX: We'd like to use a more - * verbose log level as it's actually an - * unexpected event, but the kernel bug - * reportedly happens pretty frequently - * (and it can also be a false positive) - * so it would be just too noisy. - */ - thread_log(thread, - ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, - ISC_LOG_DEBUG(1), - "unexpected POLL timeout"); - } - pollstate = poll_active; - } -#endif /* if defined(USE_DEVPOLL) && defined(ISC_SOCKET_USE_POLLWATCH) */ - } while (cc < 0); - -#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) - done = process_fds(thread, thread->events, cc); -#elif defined(USE_SELECT) - process_fds(thread, maxfd, thread->read_fds_copy, - thread->write_fds_copy); - - /* - * Process reads on internal, control fd. - */ - if (FD_ISSET(ctlfd, thread->read_fds_copy)) { - done = process_ctlfd(thread); - } -#endif /* if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) \ - * */ - } - - thread_log(thread, TRACE, "watcher exiting"); - return ((isc_threadresult_t)0); -} - -void -isc_socketmgr_setreserved(isc_socketmgr_t *manager, uint32_t reserved) { - REQUIRE(VALID_MANAGER(manager)); - - manager->reserved = reserved; -} - -void -isc_socketmgr_maxudp(isc_socketmgr_t *manager, unsigned int maxudp) { - REQUIRE(VALID_MANAGER(manager)); - - manager->maxudp = maxudp; -} - -/* - * Setup socket thread, thread->manager and thread->threadid must be filled. - */ - -static isc_result_t -setup_thread(isc__socketthread_t *thread) { - isc_result_t result = ISC_R_SUCCESS; - int i; - char strbuf[ISC_STRERRORSIZE]; - - REQUIRE(thread != NULL); - REQUIRE(VALID_MANAGER(thread->manager)); - REQUIRE(thread->threadid >= 0 && - thread->threadid < thread->manager->nthreads); - - thread->fds = - isc_mem_get(thread->manager->mctx, - thread->manager->maxsocks * sizeof(isc_socket_t *)); - - memset(thread->fds, 0, - thread->manager->maxsocks * sizeof(isc_socket_t *)); - - thread->fdstate = isc_mem_get(thread->manager->mctx, - thread->manager->maxsocks * sizeof(int)); - - memset(thread->fdstate, 0, thread->manager->maxsocks * sizeof(int)); - - thread->fdlock = isc_mem_get(thread->manager->mctx, - FDLOCK_COUNT * sizeof(isc_mutex_t)); - - for (i = 0; i < FDLOCK_COUNT; i++) { - isc_mutex_init(&thread->fdlock[i]); - } - - if (pipe(thread->pipe_fds) != 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "pipe() failed: %s", - strbuf); - return (ISC_R_UNEXPECTED); - } - RUNTIME_CHECK(make_nonblock(thread->pipe_fds[0]) == ISC_R_SUCCESS); - -#ifdef USE_KQUEUE - thread->nevents = ISC_SOCKET_MAXEVENTS; - thread->events = isc_mem_get(thread->manager->mctx, - sizeof(struct kevent) * thread->nevents); - - thread->kqueue_fd = kqueue(); - if (thread->kqueue_fd == -1) { - result = isc__errno2result(errno); - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "kqueue failed: %s", - strbuf); - isc_mem_put(thread->manager->mctx, thread->events, - sizeof(struct kevent) * thread->nevents); - return (result); - } - - result = watch_fd(thread, thread->pipe_fds[0], SELECT_POKE_READ); - if (result != ISC_R_SUCCESS) { - close(thread->kqueue_fd); - isc_mem_put(thread->manager->mctx, thread->events, - sizeof(struct kevent) * thread->nevents); - } - return (result); - -#elif defined(USE_EPOLL) - thread->nevents = ISC_SOCKET_MAXEVENTS; - thread->epoll_events = - isc_mem_get(thread->manager->mctx, - (thread->manager->maxsocks * sizeof(uint32_t))); - - memset(thread->epoll_events, 0, - thread->manager->maxsocks * sizeof(uint32_t)); - - thread->events = - isc_mem_get(thread->manager->mctx, - sizeof(struct epoll_event) * thread->nevents); - - thread->epoll_fd = epoll_create(thread->nevents); - if (thread->epoll_fd == -1) { - result = isc__errno2result(errno); - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "epoll_create failed: %s", - strbuf); - return (result); - } - - result = watch_fd(thread, thread->pipe_fds[0], SELECT_POKE_READ); - return (result); - -#elif defined(USE_DEVPOLL) - thread->nevents = ISC_SOCKET_MAXEVENTS; - result = isc_resource_getcurlimit(isc_resource_openfiles, - &thread->open_max); - if (result != ISC_R_SUCCESS) { - thread->open_max = 64; - } - thread->calls = 0; - thread->events = isc_mem_get(thread->manager->mctx, - sizeof(struct pollfd) * thread->nevents); - - /* - * Note: fdpollinfo should be able to support all possible FDs, so - * it must have maxsocks entries (not nevents). - */ - thread->fdpollinfo = - isc_mem_get(thread->manager->mctx, - sizeof(pollinfo_t) * thread->manager->maxsocks); - memset(thread->fdpollinfo, 0, - sizeof(pollinfo_t) * thread->manager->maxsocks); - thread->devpoll_fd = open("/dev/poll", O_RDWR); - if (thread->devpoll_fd == -1) { - result = isc__errno2result(errno); - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "open(/dev/poll) failed: %s", strbuf); - isc_mem_put(thread->manager->mctx, thread->events, - sizeof(struct pollfd) * thread->nevents); - isc_mem_put(thread->manager->mctx, thread->fdpollinfo, - sizeof(pollinfo_t) * thread->manager->maxsocks); - return (result); - } - result = watch_fd(thread, thread->pipe_fds[0], SELECT_POKE_READ); - if (result != ISC_R_SUCCESS) { - close(thread->devpoll_fd); - isc_mem_put(thread->manager->mctx, thread->events, - sizeof(struct pollfd) * thread->nevents); - isc_mem_put(thread->manager->mctx, thread->fdpollinfo, - sizeof(pollinfo_t) * thread->manager->maxsocks); - return (result); - } - - return (ISC_R_SUCCESS); -#elif defined(USE_SELECT) - UNUSED(result); - -#if ISC_SOCKET_MAXSOCKETS > FD_SETSIZE - /* - * Note: this code should also cover the case of MAXSOCKETS <= - * FD_SETSIZE, but we separate the cases to avoid possible portability - * issues regarding howmany() and the actual representation of fd_set. - */ - thread->fd_bufsize = howmany(manager->maxsocks, NFDBITS) * - sizeof(fd_mask); -#else /* if ISC_SOCKET_MAXSOCKETS > FD_SETSIZE */ - thread->fd_bufsize = sizeof(fd_set); -#endif /* if ISC_SOCKET_MAXSOCKETS > FD_SETSIZE */ - - thread->read_fds = isc_mem_get(thread->manager->mctx, - thread->fd_bufsize); - thread->read_fds_copy = isc_mem_get(thread->manager->mctx, - thread->fd_bufsize); - thread->write_fds = isc_mem_get(thread->manager->mctx, - thread->fd_bufsize); - thread->write_fds_copy = isc_mem_get(thread->manager->mctx, - thread->fd_bufsize); - memset(thread->read_fds, 0, thread->fd_bufsize); - memset(thread->write_fds, 0, thread->fd_bufsize); - - (void)watch_fd(thread, thread->pipe_fds[0], SELECT_POKE_READ); - thread->maxfd = thread->pipe_fds[0]; - - return (ISC_R_SUCCESS); -#endif /* USE_KQUEUE */ -} - -static void -cleanup_thread(isc_mem_t *mctx, isc__socketthread_t *thread) { - isc_result_t result; - int i; - - result = unwatch_fd(thread, thread->pipe_fds[0], SELECT_POKE_READ); - if (result != ISC_R_SUCCESS) { - UNEXPECTED_ERROR(__FILE__, __LINE__, "epoll_ctl(DEL) failed"); - } -#ifdef USE_KQUEUE - close(thread->kqueue_fd); - isc_mem_put(mctx, thread->events, - sizeof(struct kevent) * thread->nevents); -#elif defined(USE_EPOLL) - close(thread->epoll_fd); - - isc_mem_put(mctx, thread->events, - sizeof(struct epoll_event) * thread->nevents); -#elif defined(USE_DEVPOLL) - close(thread->devpoll_fd); - isc_mem_put(mctx, thread->events, - sizeof(struct pollfd) * thread->nevents); - isc_mem_put(mctx, thread->fdpollinfo, - sizeof(pollinfo_t) * thread->manager->maxsocks); -#elif defined(USE_SELECT) - if (thread->read_fds != NULL) { - isc_mem_put(mctx, thread->read_fds, thread->fd_bufsize); - } - if (thread->read_fds_copy != NULL) { - isc_mem_put(mctx, thread->read_fds_copy, thread->fd_bufsize); - } - if (thread->write_fds != NULL) { - isc_mem_put(mctx, thread->write_fds, thread->fd_bufsize); - } - if (thread->write_fds_copy != NULL) { - isc_mem_put(mctx, thread->write_fds_copy, thread->fd_bufsize); - } -#endif /* USE_KQUEUE */ - for (i = 0; i < (int)thread->manager->maxsocks; i++) { - if (thread->fdstate[i] == CLOSE_PENDING) { - /* no need to lock */ - (void)close(i); - } - } - -#if defined(USE_EPOLL) - isc_mem_put(thread->manager->mctx, thread->epoll_events, - thread->manager->maxsocks * sizeof(uint32_t)); -#endif /* if defined(USE_EPOLL) */ - isc_mem_put(thread->manager->mctx, thread->fds, - thread->manager->maxsocks * sizeof(isc_socket_t *)); - isc_mem_put(thread->manager->mctx, thread->fdstate, - thread->manager->maxsocks * sizeof(int)); - - for (i = 0; i < FDLOCK_COUNT; i++) { - isc_mutex_destroy(&thread->fdlock[i]); - } - isc_mem_put(thread->manager->mctx, thread->fdlock, - FDLOCK_COUNT * sizeof(isc_mutex_t)); -} - -isc_result_t -isc__socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp, - unsigned int maxsocks, int nthreads) { - int i; - isc_socketmgr_t *manager; - - REQUIRE(managerp != NULL && *managerp == NULL); - - if (maxsocks == 0) { - maxsocks = ISC_SOCKET_MAXSOCKETS; - } - - manager = isc_mem_get(mctx, sizeof(*manager)); - - /* zero-clear so that necessary cleanup on failure will be easy */ - memset(manager, 0, sizeof(*manager)); - manager->maxsocks = maxsocks; - manager->reserved = 0; - manager->maxudp = 0; - manager->nthreads = nthreads; - manager->stats = NULL; - - manager->magic = SOCKET_MANAGER_MAGIC; - manager->mctx = NULL; - ISC_LIST_INIT(manager->socklist); - isc_mutex_init(&manager->lock); - isc_condition_init(&manager->shutdown_ok); - - /* - * Start up the select/poll thread. - */ - manager->threads = isc_mem_get(mctx, sizeof(isc__socketthread_t) * - manager->nthreads); - isc_mem_attach(mctx, &manager->mctx); - - for (i = 0; i < manager->nthreads; i++) { - manager->threads[i].manager = manager; - manager->threads[i].threadid = i; - setup_thread(&manager->threads[i]); - isc_thread_create(netthread, &manager->threads[i], - &manager->threads[i].thread); - char tname[1024]; - sprintf(tname, "isc-socket-%d", i); - isc_thread_setname(manager->threads[i].thread, tname); - } - - *managerp = manager; - - return (ISC_R_SUCCESS); -} - -isc_result_t -isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp) { - REQUIRE(VALID_MANAGER(manager)); - REQUIRE(nsockp != NULL); - - *nsockp = manager->maxsocks; - - return (ISC_R_SUCCESS); -} - -void -isc_socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats) { - REQUIRE(VALID_MANAGER(manager)); - REQUIRE(ISC_LIST_EMPTY(manager->socklist)); - REQUIRE(manager->stats == NULL); - REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max); - - isc_stats_attach(stats, &manager->stats); -} - -void -isc__socketmgr_destroy(isc_socketmgr_t **managerp) { - isc_socketmgr_t *manager; - - /* - * Destroy a socket manager. - */ - - REQUIRE(managerp != NULL); - manager = *managerp; - REQUIRE(VALID_MANAGER(manager)); - - LOCK(&manager->lock); - - /* - * Wait for all sockets to be destroyed. - */ - while (!ISC_LIST_EMPTY(manager->socklist)) { - manager_log(manager, CREATION, "sockets exist"); - WAIT(&manager->shutdown_ok, &manager->lock); - } - - UNLOCK(&manager->lock); - - /* - * Here, poke our select/poll thread. Do this by closing the write - * half of the pipe, which will send EOF to the read half. - * This is currently a no-op in the non-threaded case. - */ - for (int i = 0; i < manager->nthreads; i++) { - select_poke(manager, i, 0, SELECT_POKE_SHUTDOWN); - } - - /* - * Wait for thread to exit. - */ - for (int i = 0; i < manager->nthreads; i++) { - isc_thread_join(manager->threads[i].thread, NULL); - cleanup_thread(manager->mctx, &manager->threads[i]); - } - /* - * Clean up. - */ - isc_mem_put(manager->mctx, manager->threads, - sizeof(isc__socketthread_t) * manager->nthreads); - (void)isc_condition_destroy(&manager->shutdown_ok); - - if (manager->stats != NULL) { - isc_stats_detach(&manager->stats); - } - isc_mutex_destroy(&manager->lock); - manager->magic = 0; - isc_mem_putanddetach(&manager->mctx, manager, sizeof(*manager)); - - *managerp = NULL; -} - -static isc_result_t -socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, - unsigned int flags) { - int io_state; - bool have_lock = false; - isc_task_t *ntask = NULL; - isc_result_t result = ISC_R_SUCCESS; - - dev->ev_sender = task; - - if (sock->type == isc_sockettype_udp) { - io_state = doio_recv(sock, dev); - } else { - LOCK(&sock->lock); - have_lock = true; - - if (ISC_LIST_EMPTY(sock->recv_list)) { - io_state = doio_recv(sock, dev); - } else { - io_state = DOIO_SOFT; - } - } - - switch (io_state) { - case DOIO_SOFT: - /* - * We couldn't read all or part of the request right now, so - * queue it. - * - * Attach to socket and to task - */ - isc_task_attach(task, &ntask); - dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; - - if (!have_lock) { - LOCK(&sock->lock); - have_lock = true; - } - - /* - * Enqueue the request. If the socket was previously not being - * watched, poke the watcher to start paying attention to it. - */ - bool do_poke = ISC_LIST_EMPTY(sock->recv_list); - ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link); - if (do_poke) { - select_poke(sock->manager, sock->threadid, sock->fd, - SELECT_POKE_READ); - } - - socket_log(sock, NULL, EVENT, - "socket_recv: event %p -> task %p", dev, ntask); - - if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) { - result = ISC_R_INPROGRESS; - } - break; - - case DOIO_EOF: - dev->result = ISC_R_EOF; - /* fallthrough */ - - case DOIO_HARD: - case DOIO_SUCCESS: - if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0) { - send_recvdone_event(sock, &dev); - } - break; - } - - if (have_lock) { - UNLOCK(&sock->lock); - } - - return (result); -} - -isc_result_t -isc_socket_recv(isc_socket_t *sock, isc_region_t *region, unsigned int minimum, - isc_task_t *task, isc_taskaction_t action, void *arg) { - isc_socketevent_t *dev; - isc_socketmgr_t *manager; - - REQUIRE(VALID_SOCKET(sock)); - REQUIRE(action != NULL); - - manager = sock->manager; - REQUIRE(VALID_MANAGER(manager)); - - INSIST(sock->bound); - - dev = allocate_socketevent(manager->mctx, sock, ISC_SOCKEVENT_RECVDONE, - action, arg); - if (dev == NULL) { - return (ISC_R_NOMEMORY); - } - - return (isc_socket_recv2(sock, region, minimum, task, dev, 0)); -} - -isc_result_t -isc_socket_recv2(isc_socket_t *sock, isc_region_t *region, unsigned int minimum, - isc_task_t *task, isc_socketevent_t *event, - unsigned int flags) { - event->ev_sender = sock; - event->result = ISC_R_UNSET; - event->region = *region; - event->n = 0; - event->offset = 0; - event->attributes = 0; - - /* - * UDP sockets are always partial read. - */ - if (sock->type == isc_sockettype_udp) { - event->minimum = 1; - } else { - if (minimum == 0) { - event->minimum = region->length; - } else { - event->minimum = minimum; - } - } - - return (socket_recv(sock, event, task, flags)); -} - -static isc_result_t -socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, - const isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, - unsigned int flags) { - int io_state; - bool have_lock = false; - isc_task_t *ntask = NULL; - isc_result_t result = ISC_R_SUCCESS; - - dev->ev_sender = task; - - set_dev_address(address, sock, dev); - if (pktinfo != NULL) { - dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; - dev->pktinfo = *pktinfo; - - if (!isc_sockaddr_issitelocal(&dev->address) && - !isc_sockaddr_islinklocal(&dev->address)) - { - socket_log(sock, NULL, TRACE, - "pktinfo structure provided, ifindex %u " - "(set to 0)", - pktinfo->ipi6_ifindex); - - /* - * Set the pktinfo index to 0 here, to let the - * kernel decide what interface it should send on. - */ - dev->pktinfo.ipi6_ifindex = 0; - } - } - - if (sock->type == isc_sockettype_udp) { - io_state = doio_send(sock, dev); - } else { - LOCK(&sock->lock); - have_lock = true; - - if (ISC_LIST_EMPTY(sock->send_list)) { - io_state = doio_send(sock, dev); - } else { - io_state = DOIO_SOFT; - } - } - - switch (io_state) { - case DOIO_SOFT: - /* - * We couldn't send all or part of the request right now, so - * queue it unless ISC_SOCKFLAG_NORETRY is set. - */ - if ((flags & ISC_SOCKFLAG_NORETRY) == 0) { - isc_task_attach(task, &ntask); - dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; - - if (!have_lock) { - LOCK(&sock->lock); - have_lock = true; - } - - /* - * Enqueue the request. If the socket was previously - * not being watched, poke the watcher to start - * paying attention to it. - */ - bool do_poke = ISC_LIST_EMPTY(sock->send_list); - ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link); - if (do_poke) { - select_poke(sock->manager, sock->threadid, - sock->fd, SELECT_POKE_WRITE); - } - socket_log(sock, NULL, EVENT, - "socket_send: event %p -> task %p", dev, - ntask); - - if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) { - result = ISC_R_INPROGRESS; - } - break; - } - - /* FALLTHROUGH */ - - case DOIO_HARD: - case DOIO_SUCCESS: - if (!have_lock) { - LOCK(&sock->lock); - have_lock = true; - } - if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0) { - send_senddone_event(sock, &dev); - } - break; - } - - if (have_lock) { - UNLOCK(&sock->lock); - } - - return (result); -} - -isc_result_t -isc_socket_send(isc_socket_t *sock, isc_region_t *region, isc_task_t *task, - isc_taskaction_t action, void *arg) { - /* - * REQUIRE() checking is performed in isc_socket_sendto(). - */ - return (isc_socket_sendto(sock, region, task, action, arg, NULL, NULL)); -} - -isc_result_t -isc_socket_sendto(isc_socket_t *sock, isc_region_t *region, isc_task_t *task, - isc_taskaction_t action, void *arg, - const isc_sockaddr_t *address, struct in6_pktinfo *pktinfo) { - isc_socketevent_t *dev; - isc_socketmgr_t *manager; - - REQUIRE(VALID_SOCKET(sock)); - REQUIRE(region != NULL); - REQUIRE(task != NULL); - REQUIRE(action != NULL); - - manager = sock->manager; - REQUIRE(VALID_MANAGER(manager)); - - INSIST(sock->bound); - - dev = allocate_socketevent(manager->mctx, sock, ISC_SOCKEVENT_SENDDONE, - action, arg); - if (dev == NULL) { - return (ISC_R_NOMEMORY); - } - - dev->region = *region; - - return (socket_send(sock, dev, task, address, pktinfo, 0)); -} - -isc_result_t -isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region, isc_task_t *task, - const isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, - isc_socketevent_t *event, unsigned int flags) { - REQUIRE(VALID_SOCKET(sock)); - REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE | ISC_SOCKFLAG_NORETRY)) == - 0); - if ((flags & ISC_SOCKFLAG_NORETRY) != 0) { - REQUIRE(sock->type == isc_sockettype_udp); - } - event->ev_sender = sock; - event->result = ISC_R_UNSET; - event->region = *region; - event->n = 0; - event->offset = 0; - event->attributes &= ~ISC_SOCKEVENTATTR_ATTACHED; - - return (socket_send(sock, event, task, address, pktinfo, flags)); -} - -void -isc_socket_cleanunix(const isc_sockaddr_t *sockaddr, bool active) { - int s; - struct stat sb; - char strbuf[ISC_STRERRORSIZE]; - - if (sockaddr->type.sa.sa_family != AF_UNIX) { - return; - } - -#ifndef S_ISSOCK -#if defined(S_IFMT) && defined(S_IFSOCK) -#define S_ISSOCK(mode) ((mode & S_IFMT) == S_IFSOCK) -#elif defined(_S_IFMT) && defined(S_IFSOCK) -#define S_ISSOCK(mode) ((mode & _S_IFMT) == S_IFSOCK) -#endif /* if defined(S_IFMT) && defined(S_IFSOCK) */ -#endif /* ifndef S_ISSOCK */ - -#ifndef S_ISFIFO -#if defined(S_IFMT) && defined(S_IFIFO) -#define S_ISFIFO(mode) ((mode & S_IFMT) == S_IFIFO) -#elif defined(_S_IFMT) && defined(S_IFIFO) -#define S_ISFIFO(mode) ((mode & _S_IFMT) == S_IFIFO) -#endif /* if defined(S_IFMT) && defined(S_IFIFO) */ -#endif /* ifndef S_ISFIFO */ - -#if !defined(S_ISFIFO) && !defined(S_ISSOCK) -/* cppcheck-suppress preprocessorErrorDirective */ -#error \ - You need to define S_ISFIFO and S_ISSOCK as appropriate for your platform. See . -#endif /* if !defined(S_ISFIFO) && !defined(S_ISSOCK) */ - -#ifndef S_ISFIFO -#define S_ISFIFO(mode) 0 -#endif /* ifndef S_ISFIFO */ - -#ifndef S_ISSOCK -#define S_ISSOCK(mode) 0 -#endif /* ifndef S_ISSOCK */ - - if (stat(sockaddr->type.sunix.sun_path, &sb) < 0) { - switch (errno) { - case ENOENT: - if (active) { /* We exited cleanly last time */ - break; - } - /* FALLTHROUGH */ - default: - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, - active ? ISC_LOG_ERROR : ISC_LOG_WARNING, - "isc_socket_cleanunix: stat(%s): %s", - sockaddr->type.sunix.sun_path, strbuf); - return; - } - } else { - if (!(S_ISSOCK(sb.st_mode) || S_ISFIFO(sb.st_mode))) { - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, - active ? ISC_LOG_ERROR : ISC_LOG_WARNING, - "isc_socket_cleanunix: %s: not a socket", - sockaddr->type.sunix.sun_path); - return; - } - } - - if (active) { - if (unlink(sockaddr->type.sunix.sun_path) < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "isc_socket_cleanunix: unlink(%s): %s", - sockaddr->type.sunix.sun_path, strbuf); - } - return; - } - - s = socket(AF_UNIX, SOCK_STREAM, 0); - if (s < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_WARNING, - "isc_socket_cleanunix: socket(%s): %s", - sockaddr->type.sunix.sun_path, strbuf); - return; - } - - if (connect(s, (const struct sockaddr *)&sockaddr->type.sunix, - sizeof(sockaddr->type.sunix)) < 0) - { - switch (errno) { - case ECONNREFUSED: - case ECONNRESET: - if (unlink(sockaddr->type.sunix.sun_path) < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write( - isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_WARNING, - "isc_socket_cleanunix: " - "unlink(%s): %s", - sockaddr->type.sunix.sun_path, strbuf); - } - break; - default: - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_WARNING, - "isc_socket_cleanunix: connect(%s): %s", - sockaddr->type.sunix.sun_path, strbuf); - break; - } - } - close(s); -} - -isc_result_t -isc_socket_permunix(const isc_sockaddr_t *sockaddr, uint32_t perm, - uint32_t owner, uint32_t group) { - isc_result_t result = ISC_R_SUCCESS; - char strbuf[ISC_STRERRORSIZE]; - char path[sizeof(sockaddr->type.sunix.sun_path)]; -#ifdef NEED_SECURE_DIRECTORY - char *slash; -#endif /* ifdef NEED_SECURE_DIRECTORY */ - - REQUIRE(sockaddr->type.sa.sa_family == AF_UNIX); - INSIST(strlen(sockaddr->type.sunix.sun_path) < sizeof(path)); - strlcpy(path, sockaddr->type.sunix.sun_path, sizeof(path)); - -#ifdef NEED_SECURE_DIRECTORY - slash = strrchr(path, '/'); - if (slash != NULL) { - if (slash != path) { - *slash = '\0'; - } else { - strlcpy(path, "/", sizeof(path)); - } - } else { - strlcpy(path, ".", sizeof(path)); - } -#endif /* ifdef NEED_SECURE_DIRECTORY */ - - if (chmod(path, perm) < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "isc_socket_permunix: chmod(%s, %d): %s", path, - perm, strbuf); - result = ISC_R_FAILURE; - } - if (chown(path, owner, group) < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "isc_socket_permunix: chown(%s, %d, %d): %s", - path, owner, group, strbuf); - result = ISC_R_FAILURE; - } - return (result); -} - -isc_result_t -isc_socket_bind(isc_socket_t *sock, const isc_sockaddr_t *sockaddr, - isc_socket_options_t options) { - char strbuf[ISC_STRERRORSIZE]; - int on = 1; - - REQUIRE(VALID_SOCKET(sock)); - - LOCK(&sock->lock); - - INSIST(!sock->bound); - - if (sock->pf != sockaddr->type.sa.sa_family) { - UNLOCK(&sock->lock); - return (ISC_R_FAMILYMISMATCH); - } - - /* - * Only set SO_REUSEADDR when we want a specific port. - */ -#ifdef AF_UNIX - if (sock->pf == AF_UNIX) { - goto bind_socket; - } -#endif /* ifdef AF_UNIX */ - if ((options & ISC_SOCKET_REUSEADDRESS) != 0 && - isc_sockaddr_getport(sockaddr) != (in_port_t)0) - { - if (setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on, - sizeof(on)) < 0) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d) failed", sock->fd); - } -#if defined(__FreeBSD_kernel__) && defined(SO_REUSEPORT_LB) - if (setsockopt(sock->fd, SOL_SOCKET, SO_REUSEPORT_LB, - (void *)&on, sizeof(on)) < 0) - { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d) failed", sock->fd); - } -#elif defined(__linux__) && defined(SO_REUSEPORT) - if (setsockopt(sock->fd, SOL_SOCKET, SO_REUSEPORT, (void *)&on, - sizeof(on)) < 0) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d) failed", sock->fd); - } -#endif /* if defined(__FreeBSD_kernel__) && defined(SO_REUSEPORT_LB) */ - /* Press on... */ - } -#ifdef AF_UNIX -bind_socket: -#endif /* ifdef AF_UNIX */ - if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) { - inc_stats(sock->manager->stats, - sock->statsindex[STATID_BINDFAIL]); - - UNLOCK(&sock->lock); - switch (errno) { - case EACCES: - return (ISC_R_NOPERM); - case EADDRNOTAVAIL: - return (ISC_R_ADDRNOTAVAIL); - case EADDRINUSE: - return (ISC_R_ADDRINUSE); - case EINVAL: - return (ISC_R_BOUND); - default: - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s", - strbuf); - return (ISC_R_UNEXPECTED); - } - } - - socket_log(sock, sockaddr, TRACE, "bound"); - sock->bound = 1; - - UNLOCK(&sock->lock); - return (ISC_R_SUCCESS); -} - -/* - * Enable this only for specific OS versions, and only when they have repaired - * their problems with it. Until then, this is is broken and needs to be - * disabled by default. See RT22589 for details. - */ -#undef ENABLE_ACCEPTFILTER - -isc_result_t -isc_socket_filter(isc_socket_t *sock, const char *filter) { -#if defined(SO_ACCEPTFILTER) && defined(ENABLE_ACCEPTFILTER) - char strbuf[ISC_STRERRORSIZE]; - struct accept_filter_arg afa; -#else /* if defined(SO_ACCEPTFILTER) && defined(ENABLE_ACCEPTFILTER) */ - UNUSED(sock); - UNUSED(filter); -#endif /* if defined(SO_ACCEPTFILTER) && defined(ENABLE_ACCEPTFILTER) */ - - REQUIRE(VALID_SOCKET(sock)); - -#if defined(SO_ACCEPTFILTER) && defined(ENABLE_ACCEPTFILTER) - bzero(&afa, sizeof(afa)); - strlcpy(afa.af_name, filter, sizeof(afa.af_name)); - if (setsockopt(sock->fd, SOL_SOCKET, SO_ACCEPTFILTER, &afa, - sizeof(afa)) == -1) { - strerror_r(errno, strbuf, sizeof(strbuf)); - socket_log(sock, NULL, CREATION, - "setsockopt(SO_ACCEPTFILTER): %s", strbuf); - return (ISC_R_FAILURE); - } - return (ISC_R_SUCCESS); -#else /* if defined(SO_ACCEPTFILTER) && defined(ENABLE_ACCEPTFILTER) */ - return (ISC_R_NOTIMPLEMENTED); -#endif /* if defined(SO_ACCEPTFILTER) && defined(ENABLE_ACCEPTFILTER) */ -} - -/* - * Try enabling TCP Fast Open for a given socket if the OS supports it. - */ -static void -set_tcp_fastopen(isc_socket_t *sock, unsigned int backlog) { -#if defined(ENABLE_TCP_FASTOPEN) && defined(TCP_FASTOPEN) - char strbuf[ISC_STRERRORSIZE]; - -/* - * FreeBSD, as of versions 10.3 and 11.0, defines TCP_FASTOPEN while also - * shipping a default kernel without TFO support, so we special-case it by - * performing an additional runtime check for TFO support using sysctl to - * prevent setsockopt() errors from being logged. - */ -#if defined(__FreeBSD__) && defined(HAVE_SYSCTLBYNAME) -#define SYSCTL_TFO "net.inet.tcp.fastopen.enabled" - unsigned int enabled; - size_t enabledlen = sizeof(enabled); - static bool tfo_notice_logged = false; - - if (sysctlbyname(SYSCTL_TFO, &enabled, &enabledlen, NULL, 0) < 0) { - /* - * This kernel does not support TCP Fast Open. There is - * nothing more we can do. - */ - return; - } else if (enabled == 0) { - /* - * This kernel does support TCP Fast Open, but it is disabled - * by sysctl. Notify the user, but do not nag. - */ - if (!tfo_notice_logged) { - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_NOTICE, - "TCP_FASTOPEN support is disabled by " - "sysctl (" SYSCTL_TFO " = 0)"); - tfo_notice_logged = true; - } - return; - } -#endif /* if defined(__FreeBSD__) && defined(HAVE_SYSCTLBYNAME) */ - -#ifdef __APPLE__ - backlog = 1; -#else /* ifdef __APPLE__ */ - backlog = backlog / 2; - if (backlog == 0) { - backlog = 1; - } -#endif /* ifdef __APPLE__ */ - if (setsockopt(sock->fd, IPPROTO_TCP, TCP_FASTOPEN, (void *)&backlog, - sizeof(backlog)) < 0) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, TCP_FASTOPEN) failed with %s", - sock->fd, strbuf); - /* TCP_FASTOPEN is experimental so ignore failures */ - } -#else /* if defined(ENABLE_TCP_FASTOPEN) && defined(TCP_FASTOPEN) */ - UNUSED(sock); - UNUSED(backlog); -#endif /* if defined(ENABLE_TCP_FASTOPEN) && defined(TCP_FASTOPEN) */ -} - -/* - * Set up to listen on a given socket. We do this by creating an internal - * event that will be dispatched when the socket has read activity. The - * watcher will send the internal event to the task when there is a new - * connection. - * - * Unlike in read, we don't preallocate a done event here. Every time there - * is a new connection we'll have to allocate a new one anyway, so we might - * as well keep things simple rather than having to track them. - */ -isc_result_t -isc_socket_listen(isc_socket_t *sock, unsigned int backlog) { - char strbuf[ISC_STRERRORSIZE]; - - REQUIRE(VALID_SOCKET(sock)); - - LOCK(&sock->lock); - - REQUIRE(!sock->listener); - REQUIRE(sock->bound); - REQUIRE(sock->type == isc_sockettype_tcp || - sock->type == isc_sockettype_unix); - - if (backlog == 0) { - backlog = SOMAXCONN; - } - - if (listen(sock->fd, (int)backlog) < 0) { - UNLOCK(&sock->lock); - strerror_r(errno, strbuf, sizeof(strbuf)); - - UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf); - - return (ISC_R_UNEXPECTED); - } - - set_tcp_fastopen(sock, backlog); - - sock->listener = 1; - - UNLOCK(&sock->lock); - return (ISC_R_SUCCESS); -} - -/* - * This should try to do aggressive accept() XXXMLG - */ -isc_result_t -isc_socket_accept(isc_socket_t *sock, isc_task_t *task, isc_taskaction_t action, - void *arg) { - isc_socket_newconnev_t *dev; - isc_socketmgr_t *manager; - isc_task_t *ntask = NULL; - isc_socket_t *nsock; - isc_result_t result; - bool do_poke = false; - - REQUIRE(VALID_SOCKET(sock)); - manager = sock->manager; - REQUIRE(VALID_MANAGER(manager)); - - LOCK(&sock->lock); - - REQUIRE(sock->listener); - - /* - * Sender field is overloaded here with the task we will be sending - * this event to. Just before the actual event is delivered the - * actual ev_sender will be touched up to be the socket. - */ - dev = (isc_socket_newconnev_t *)isc_event_allocate( - manager->mctx, task, ISC_SOCKEVENT_NEWCONN, action, arg, - sizeof(*dev)); - ISC_LINK_INIT(dev, ev_link); - - result = allocate_socket(manager, sock->type, &nsock); - if (result != ISC_R_SUCCESS) { - isc_event_free(ISC_EVENT_PTR(&dev)); - UNLOCK(&sock->lock); - return (result); - } - - /* - * Attach to socket and to task. - */ - isc_task_attach(task, &ntask); - if (isc_task_exiting(ntask)) { - free_socket(&nsock); - isc_task_detach(&ntask); - isc_event_free(ISC_EVENT_PTR(&dev)); - UNLOCK(&sock->lock); - return (ISC_R_SHUTTINGDOWN); - } - isc_refcount_increment0(&nsock->references); - nsock->statsindex = sock->statsindex; - - dev->ev_sender = ntask; - dev->newsocket = nsock; - - /* - * Poke watcher here. We still have the socket locked, so there - * is no race condition. We will keep the lock for such a short - * bit of time waking it up now or later won't matter all that much. - */ - do_poke = ISC_LIST_EMPTY(sock->accept_list); - ISC_LIST_ENQUEUE(sock->accept_list, dev, ev_link); - if (do_poke) { - select_poke(manager, sock->threadid, sock->fd, - SELECT_POKE_ACCEPT); - } - UNLOCK(&sock->lock); - return (ISC_R_SUCCESS); -} - -isc_result_t -isc_socket_connect(isc_socket_t *sock, const isc_sockaddr_t *addr, - isc_task_t *task, isc_taskaction_t action, void *arg) { - isc_socket_connev_t *dev; - isc_task_t *ntask = NULL; - isc_socketmgr_t *manager; - int cc; - char strbuf[ISC_STRERRORSIZE]; - char addrbuf[ISC_SOCKADDR_FORMATSIZE]; - - REQUIRE(VALID_SOCKET(sock)); - REQUIRE(addr != NULL); - REQUIRE(task != NULL); - REQUIRE(action != NULL); - - manager = sock->manager; - REQUIRE(VALID_MANAGER(manager)); - REQUIRE(addr != NULL); - - if (isc_sockaddr_ismulticast(addr)) { - return (ISC_R_MULTICAST); - } - - LOCK(&sock->lock); - - dev = (isc_socket_connev_t *)isc_event_allocate( - manager->mctx, sock, ISC_SOCKEVENT_CONNECT, action, arg, - sizeof(*dev)); - ISC_LINK_INIT(dev, ev_link); - - if (sock->connecting) { - INSIST(isc_sockaddr_equal(&sock->peer_address, addr)); - goto queue; - } - - if (sock->connected) { - INSIST(isc_sockaddr_equal(&sock->peer_address, addr)); - dev->result = ISC_R_SUCCESS; - isc_task_sendto(task, ISC_EVENT_PTR(&dev), sock->threadid); - - UNLOCK(&sock->lock); - - return (ISC_R_SUCCESS); - } - - /* - * Try to do the connect right away, as there can be only one - * outstanding, and it might happen to complete. - */ - sock->peer_address = *addr; - cc = connect(sock->fd, &addr->type.sa, addr->length); - if (cc < 0) { - /* - * The socket is nonblocking and the connection cannot be - * completed immediately. It is possible to select(2) or - * poll(2) for completion by selecting the socket for writing. - * After select(2) indicates writability, use getsockopt(2) to - * read the SO_ERROR option at level SOL_SOCKET to determine - * whether connect() completed successfully (SO_ERROR is zero) - * or unsuccessfully (SO_ERROR is one of the usual error codes - * listed here, explaining the reason for the failure). - */ - if (sock->type == isc_sockettype_udp && errno == EINPROGRESS) { - cc = 0; - goto success; - } - if (SOFT_ERROR(errno) || errno == EINPROGRESS) { - goto queue; - } - - switch (errno) { -#define ERROR_MATCH(a, b) \ - case a: \ - dev->result = b; \ - goto err_exit; - ERROR_MATCH(EACCES, ISC_R_NOPERM); - ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); - ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); - ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED); - ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH); -#ifdef EHOSTDOWN - ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH); -#endif /* ifdef EHOSTDOWN */ - ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH); - ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES); - ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH); - ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED); - ERROR_MATCH(ETIMEDOUT, ISC_R_TIMEDOUT); - ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET); -#undef ERROR_MATCH - } - - sock->connected = 0; - - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_sockaddr_format(addr, addrbuf, sizeof(addrbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "connect(%s) %d/%s", - addrbuf, errno, strbuf); - - UNLOCK(&sock->lock); - inc_stats(sock->manager->stats, - sock->statsindex[STATID_CONNECTFAIL]); - isc_event_free(ISC_EVENT_PTR(&dev)); - return (ISC_R_UNEXPECTED); - - err_exit: - sock->connected = 0; - isc_task_sendto(task, ISC_EVENT_PTR(&dev), sock->threadid); - - UNLOCK(&sock->lock); - inc_stats(sock->manager->stats, - sock->statsindex[STATID_CONNECTFAIL]); - return (ISC_R_SUCCESS); - } - - /* - * If connect completed, fire off the done event. - */ -success: - if (cc == 0) { - sock->connected = 1; - sock->bound = 1; - dev->result = ISC_R_SUCCESS; - isc_task_sendto(task, ISC_EVENT_PTR(&dev), sock->threadid); - - UNLOCK(&sock->lock); - - inc_stats(sock->manager->stats, - sock->statsindex[STATID_CONNECT]); - - return (ISC_R_SUCCESS); - } - -queue: - - /* - * Attach to task. - */ - isc_task_attach(task, &ntask); - - dev->ev_sender = ntask; - - /* - * Poke watcher here. We still have the socket locked, so there - * is no race condition. We will keep the lock for such a short - * bit of time waking it up now or later won't matter all that much. - */ - bool do_poke = ISC_LIST_EMPTY(sock->connect_list); - ISC_LIST_ENQUEUE(sock->connect_list, dev, ev_link); - if (do_poke && !sock->connecting) { - sock->connecting = 1; - select_poke(manager, sock->threadid, sock->fd, - SELECT_POKE_CONNECT); - } - - UNLOCK(&sock->lock); - return (ISC_R_SUCCESS); -} - -/* - * Called when a socket with a pending connect() finishes. - */ -static void -internal_connect(isc_socket_t *sock) { - isc_socket_connev_t *dev; - int cc; - isc_result_t result; - socklen_t optlen; - char strbuf[ISC_STRERRORSIZE]; - char peerbuf[ISC_SOCKADDR_FORMATSIZE]; - - INSIST(VALID_SOCKET(sock)); - REQUIRE(sock->fd >= 0); - - /* - * Get the first item off the connect list. - * If it is empty, unlock the socket and return. - */ - dev = ISC_LIST_HEAD(sock->connect_list); - if (dev == NULL) { - INSIST(!sock->connecting); - goto finish; - } - - INSIST(sock->connecting); - sock->connecting = 0; - - /* - * Get any possible error status here. - */ - optlen = sizeof(cc); - if (getsockopt(sock->fd, SOL_SOCKET, SO_ERROR, (void *)&cc, - (void *)&optlen) != 0) - { - cc = errno; - } else { - errno = cc; - } - - if (errno != 0) { - /* - * If the error is EAGAIN, just re-select on this - * fd and pretend nothing strange happened. - */ - if (SOFT_ERROR(errno) || errno == EINPROGRESS) { - sock->connecting = 1; - return; - } - - inc_stats(sock->manager->stats, - sock->statsindex[STATID_CONNECTFAIL]); - - /* - * Translate other errors into ISC_R_* flavors. - */ - switch (errno) { -#define ERROR_MATCH(a, b) \ - case a: \ - result = b; \ - break; - ERROR_MATCH(EACCES, ISC_R_NOPERM); - ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); - ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); - ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED); - ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH); -#ifdef EHOSTDOWN - ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH); -#endif /* ifdef EHOSTDOWN */ - ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH); - ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES); - ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH); - ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED); - ERROR_MATCH(ETIMEDOUT, ISC_R_TIMEDOUT); - ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET); -#undef ERROR_MATCH - default: - result = ISC_R_UNEXPECTED; - isc_sockaddr_format(&sock->peer_address, peerbuf, - sizeof(peerbuf)); - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "internal_connect: connect(%s) %s", - peerbuf, strbuf); - } - } else { - inc_stats(sock->manager->stats, - sock->statsindex[STATID_CONNECT]); - result = ISC_R_SUCCESS; - sock->connected = 1; - sock->bound = 1; - } - - do { - dev->result = result; - send_connectdone_event(sock, &dev); - dev = ISC_LIST_HEAD(sock->connect_list); - } while (dev != NULL); - -finish: - unwatch_fd(&sock->manager->threads[sock->threadid], sock->fd, - SELECT_POKE_CONNECT); -} - -isc_result_t -isc_socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) { - isc_result_t result; - - REQUIRE(VALID_SOCKET(sock)); - REQUIRE(addressp != NULL); - - LOCK(&sock->lock); - - if (sock->connected) { - *addressp = sock->peer_address; - result = ISC_R_SUCCESS; - } else { - result = ISC_R_NOTCONNECTED; - } - - UNLOCK(&sock->lock); - - return (result); -} - -isc_result_t -isc_socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) { - socklen_t len; - isc_result_t result; - char strbuf[ISC_STRERRORSIZE]; - - REQUIRE(VALID_SOCKET(sock)); - REQUIRE(addressp != NULL); - - LOCK(&sock->lock); - - if (!sock->bound) { - result = ISC_R_NOTBOUND; - goto out; - } - - result = ISC_R_SUCCESS; - - len = sizeof(addressp->type); - if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s", strbuf); - result = ISC_R_UNEXPECTED; - goto out; - } - addressp->length = (unsigned int)len; - -out: - UNLOCK(&sock->lock); - - return (result); -} - -/* - * Run through the list of events on this socket, and cancel the ones - * queued for task "task" of type "how". "how" is a bitmask. - */ -void -isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) { - REQUIRE(VALID_SOCKET(sock)); - - /* - * Quick exit if there is nothing to do. Don't even bother locking - * in this case. - */ - if (how == 0) { - return; - } - - LOCK(&sock->lock); - - /* - * All of these do the same thing, more or less. - * Each will: - * o If the internal event is marked as "posted" try to - * remove it from the task's queue. If this fails, mark it - * as canceled instead, and let the task clean it up later. - * o For each I/O request for that task of that type, post - * its done event with status of "ISC_R_CANCELED". - * o Reset any state needed. - */ - if (((how & ISC_SOCKCANCEL_RECV) != 0) && - !ISC_LIST_EMPTY(sock->recv_list)) { - isc_socketevent_t *dev; - isc_socketevent_t *next; - isc_task_t *current_task; - - dev = ISC_LIST_HEAD(sock->recv_list); - - while (dev != NULL) { - current_task = dev->ev_sender; - next = ISC_LIST_NEXT(dev, ev_link); - - if ((task == NULL) || (task == current_task)) { - dev->result = ISC_R_CANCELED; - send_recvdone_event(sock, &dev); - } - dev = next; - } - } - - if (((how & ISC_SOCKCANCEL_SEND) != 0) && - !ISC_LIST_EMPTY(sock->send_list)) { - isc_socketevent_t *dev; - isc_socketevent_t *next; - isc_task_t *current_task; - - dev = ISC_LIST_HEAD(sock->send_list); - - while (dev != NULL) { - current_task = dev->ev_sender; - next = ISC_LIST_NEXT(dev, ev_link); - - if ((task == NULL) || (task == current_task)) { - dev->result = ISC_R_CANCELED; - send_senddone_event(sock, &dev); - } - dev = next; - } - } - - if (((how & ISC_SOCKCANCEL_ACCEPT) != 0) && - !ISC_LIST_EMPTY(sock->accept_list)) { - isc_socket_newconnev_t *dev; - isc_socket_newconnev_t *next; - isc_task_t *current_task; - - dev = ISC_LIST_HEAD(sock->accept_list); - while (dev != NULL) { - current_task = dev->ev_sender; - next = ISC_LIST_NEXT(dev, ev_link); - - if ((task == NULL) || (task == current_task)) { - ISC_LIST_UNLINK(sock->accept_list, dev, - ev_link); - - isc_refcount_decrementz( - &NEWCONNSOCK(dev)->references); - free_socket((isc_socket_t **)&dev->newsocket); - - dev->result = ISC_R_CANCELED; - dev->ev_sender = sock; - isc_task_sendtoanddetach(¤t_task, - ISC_EVENT_PTR(&dev), - sock->threadid); - } - - dev = next; - } - } - - if (((how & ISC_SOCKCANCEL_CONNECT) != 0) && - !ISC_LIST_EMPTY(sock->connect_list)) - { - isc_socket_connev_t *dev; - isc_socket_connev_t *next; - isc_task_t *current_task; - - INSIST(sock->connecting); - sock->connecting = 0; - - dev = ISC_LIST_HEAD(sock->connect_list); - - while (dev != NULL) { - current_task = dev->ev_sender; - next = ISC_LIST_NEXT(dev, ev_link); - - if ((task == NULL) || (task == current_task)) { - dev->result = ISC_R_CANCELED; - send_connectdone_event(sock, &dev); - } - dev = next; - } - } - - UNLOCK(&sock->lock); -} - -isc_sockettype_t -isc_socket_gettype(isc_socket_t *sock) { - REQUIRE(VALID_SOCKET(sock)); - - return (sock->type); -} - -void -isc_socket_ipv6only(isc_socket_t *sock, bool yes) { -#if defined(IPV6_V6ONLY) - int onoff = yes ? 1 : 0; -#else /* if defined(IPV6_V6ONLY) */ - UNUSED(yes); - UNUSED(sock); -#endif /* if defined(IPV6_V6ONLY) */ - - REQUIRE(VALID_SOCKET(sock)); - -#ifdef IPV6_V6ONLY - if (sock->pf == AF_INET6) { - if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY, - (void *)&onoff, sizeof(int)) < 0) - { - char strbuf[ISC_STRERRORSIZE]; - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IPV6_V6ONLY) failed: " - "%s", - sock->fd, strbuf); - } - } -#endif /* ifdef IPV6_V6ONLY */ -} - -static void -setdscp(isc_socket_t *sock, isc_dscp_t dscp) { -#if defined(IP_TOS) || defined(IPV6_TCLASS) - int value = dscp << 2; -#endif /* if defined(IP_TOS) || defined(IPV6_TCLASS) */ - - sock->dscp = dscp; - -#ifdef IP_TOS - if (sock->pf == AF_INET) { - if (setsockopt(sock->fd, IPPROTO_IP, IP_TOS, (void *)&value, - sizeof(value)) < 0) { - char strbuf[ISC_STRERRORSIZE]; - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IP_TOS, %.02x) " - "failed: %s", - sock->fd, value >> 2, strbuf); - } - } -#endif /* ifdef IP_TOS */ -#ifdef IPV6_TCLASS - if (sock->pf == AF_INET6) { - if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_TCLASS, - (void *)&value, sizeof(value)) < 0) - { - char strbuf[ISC_STRERRORSIZE]; - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IPV6_TCLASS, %.02x) " - "failed: %s", - sock->fd, dscp >> 2, strbuf); - } - } -#endif /* ifdef IPV6_TCLASS */ -} - -void -isc_socket_dscp(isc_socket_t *sock, isc_dscp_t dscp) { - REQUIRE(VALID_SOCKET(sock)); - REQUIRE(dscp < 0x40); - -#if !defined(IP_TOS) && !defined(IPV6_TCLASS) - UNUSED(dscp); -#else /* if !defined(IP_TOS) && !defined(IPV6_TCLASS) */ - if (dscp < 0) { - return; - } - - /* The DSCP value must not be changed once it has been set. */ - if (isc_dscp_check_value != -1) { - INSIST(dscp == isc_dscp_check_value); - } -#endif /* if !defined(IP_TOS) && !defined(IPV6_TCLASS) */ - - setdscp(sock, dscp); -} - -isc_socketevent_t * -isc_socket_socketevent(isc_mem_t *mctx, void *sender, isc_eventtype_t eventtype, - isc_taskaction_t action, void *arg) { - return (allocate_socketevent(mctx, sender, eventtype, action, arg)); -} - -void -isc_socket_setname(isc_socket_t *sock, const char *name, void *tag) { - /* - * Name 'sock'. - */ - - REQUIRE(VALID_SOCKET(sock)); - - LOCK(&sock->lock); - strlcpy(sock->name, name, sizeof(sock->name)); - sock->tag = tag; - UNLOCK(&sock->lock); -} - -const char * -isc_socket_getname(isc_socket_t *sock) { - return (sock->name); -} - -void * -isc_socket_gettag(isc_socket_t *sock) { - return (sock->tag); -} - -int -isc_socket_getfd(isc_socket_t *sock) { - return ((short)sock->fd); -} - -static isc_once_t hasreuseport_once = ISC_ONCE_INIT; -static bool hasreuseport = false; - -static void -init_hasreuseport(void) { -/* - * SO_REUSEPORT works very differently on *BSD and on Linux (because why not). - * We only want to use it on Linux, if it's available. - */ -#if (defined(SO_REUSEPORT) && defined(__linux__)) || \ - (defined(SO_REUSEPORT_LB) && defined(__FreeBSD_kernel__)) - int sock, yes = 1; - sock = socket(AF_INET, SOCK_DGRAM, 0); - if (sock < 0) { - sock = socket(AF_INET6, SOCK_DGRAM, 0); - if (sock < 0) { - return; - } - } - if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (void *)&yes, - sizeof(yes)) < 0) { - close(sock); - return; -#if defined(__FreeBSD_kernel__) - } else if (setsockopt(sock, SOL_SOCKET, SO_REUSEPORT_LB, (void *)&yes, - sizeof(yes)) < 0) -#else /* if defined(__FreeBSD_kernel__) */ - } else if (setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, (void *)&yes, - sizeof(yes)) < 0) -#endif /* if defined(__FreeBSD_kernel__) */ - { - close(sock); - return; - } - hasreuseport = true; - close(sock); -#endif /* if (defined(SO_REUSEPORT) && defined(__linux__)) || \ - * (defined(SO_REUSEPORT_LB) && defined(__FreeBSD_kernel__)) */ -} - -bool -isc_socket_hasreuseport(void) { - RUNTIME_CHECK(isc_once_do(&hasreuseport_once, init_hasreuseport) == - ISC_R_SUCCESS); - return (hasreuseport); -} - -#if defined(HAVE_LIBXML2) || defined(HAVE_JSON_C) -static const char * -_socktype(isc_sockettype_t type) { - switch (type) { - case isc_sockettype_udp: - return ("udp"); - case isc_sockettype_tcp: - return ("tcp"); - case isc_sockettype_unix: - return ("unix"); - default: - return ("not-initialized"); - } -} -#endif /* if defined(HAVE_LIBXML2) || defined(HAVE_JSON_C) */ - -#ifdef HAVE_LIBXML2 -#define TRY0(a) \ - do { \ - xmlrc = (a); \ - if (xmlrc < 0) \ - goto error; \ - } while (0) -int -isc_socketmgr_renderxml(isc_socketmgr_t *mgr, void *writer0) { - isc_socket_t *sock = NULL; - char peerbuf[ISC_SOCKADDR_FORMATSIZE]; - isc_sockaddr_t addr; - socklen_t len; - int xmlrc; - xmlTextWriterPtr writer = (xmlTextWriterPtr)writer0; - - LOCK(&mgr->lock); - - TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "sockets")); - sock = ISC_LIST_HEAD(mgr->socklist); - while (sock != NULL) { - LOCK(&sock->lock); - TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "socket")); - - TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "id")); - TRY0(xmlTextWriterWriteFormatString(writer, "%p", sock)); - TRY0(xmlTextWriterEndElement(writer)); - - if (sock->name[0] != 0) { - TRY0(xmlTextWriterStartElement(writer, - ISC_XMLCHAR "name")); - TRY0(xmlTextWriterWriteFormatString(writer, "%s", - sock->name)); - TRY0(xmlTextWriterEndElement(writer)); /* name */ - } - - TRY0(xmlTextWriterStartElement(writer, - ISC_XMLCHAR "references")); - TRY0(xmlTextWriterWriteFormatString( - writer, "%d", - (int)isc_refcount_current(&sock->references))); - TRY0(xmlTextWriterEndElement(writer)); - - TRY0(xmlTextWriterWriteElement( - writer, ISC_XMLCHAR "type", - ISC_XMLCHAR _socktype(sock->type))); - - if (sock->connected) { - isc_sockaddr_format(&sock->peer_address, peerbuf, - sizeof(peerbuf)); - TRY0(xmlTextWriterWriteElement( - writer, ISC_XMLCHAR "peer-address", - ISC_XMLCHAR peerbuf)); - } - - len = sizeof(addr); - if (getsockname(sock->fd, &addr.type.sa, (void *)&len) == 0) { - isc_sockaddr_format(&addr, peerbuf, sizeof(peerbuf)); - TRY0(xmlTextWriterWriteElement( - writer, ISC_XMLCHAR "local-address", - ISC_XMLCHAR peerbuf)); - } - - TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "states")); - if (sock->listener) { - TRY0(xmlTextWriterWriteElement(writer, - ISC_XMLCHAR "state", - ISC_XMLCHAR "listener")); - } - if (sock->connected) { - TRY0(xmlTextWriterWriteElement( - writer, ISC_XMLCHAR "state", - ISC_XMLCHAR "connected")); - } - if (sock->connecting) { - TRY0(xmlTextWriterWriteElement( - writer, ISC_XMLCHAR "state", - ISC_XMLCHAR "connecting")); - } - if (sock->bound) { - TRY0(xmlTextWriterWriteElement(writer, - ISC_XMLCHAR "state", - ISC_XMLCHAR "bound")); - } - - TRY0(xmlTextWriterEndElement(writer)); /* states */ - - TRY0(xmlTextWriterEndElement(writer)); /* socket */ - - UNLOCK(&sock->lock); - sock = ISC_LIST_NEXT(sock, link); - } - TRY0(xmlTextWriterEndElement(writer)); /* sockets */ - -error: - if (sock != NULL) { - UNLOCK(&sock->lock); - } - - UNLOCK(&mgr->lock); - - return (xmlrc); -} -#endif /* HAVE_LIBXML2 */ - -#ifdef HAVE_JSON_C -#define CHECKMEM(m) \ - do { \ - if (m == NULL) { \ - result = ISC_R_NOMEMORY; \ - goto error; \ - } \ - } while (0) - -isc_result_t -isc_socketmgr_renderjson(isc_socketmgr_t *mgr, void *stats0) { - isc_result_t result = ISC_R_SUCCESS; - isc_socket_t *sock = NULL; - char peerbuf[ISC_SOCKADDR_FORMATSIZE]; - isc_sockaddr_t addr; - socklen_t len; - json_object *obj, *array = json_object_new_array(); - json_object *stats = (json_object *)stats0; - - CHECKMEM(array); - - LOCK(&mgr->lock); - - sock = ISC_LIST_HEAD(mgr->socklist); - while (sock != NULL) { - json_object *states, *entry = json_object_new_object(); - char buf[255]; - - CHECKMEM(entry); - json_object_array_add(array, entry); - - LOCK(&sock->lock); - - snprintf(buf, sizeof(buf), "%p", sock); - obj = json_object_new_string(buf); - CHECKMEM(obj); - json_object_object_add(entry, "id", obj); - - if (sock->name[0] != 0) { - obj = json_object_new_string(sock->name); - CHECKMEM(obj); - json_object_object_add(entry, "name", obj); - } - - obj = json_object_new_int( - (int)isc_refcount_current(&sock->references)); - CHECKMEM(obj); - json_object_object_add(entry, "references", obj); - - obj = json_object_new_string(_socktype(sock->type)); - CHECKMEM(obj); - json_object_object_add(entry, "type", obj); - - if (sock->connected) { - isc_sockaddr_format(&sock->peer_address, peerbuf, - sizeof(peerbuf)); - obj = json_object_new_string(peerbuf); - CHECKMEM(obj); - json_object_object_add(entry, "peer-address", obj); - } - - len = sizeof(addr); - if (getsockname(sock->fd, &addr.type.sa, (void *)&len) == 0) { - isc_sockaddr_format(&addr, peerbuf, sizeof(peerbuf)); - obj = json_object_new_string(peerbuf); - CHECKMEM(obj); - json_object_object_add(entry, "local-address", obj); - } - - states = json_object_new_array(); - CHECKMEM(states); - json_object_object_add(entry, "states", states); - - if (sock->listener) { - obj = json_object_new_string("listener"); - CHECKMEM(obj); - json_object_array_add(states, obj); - } - - if (sock->connected) { - obj = json_object_new_string("connected"); - CHECKMEM(obj); - json_object_array_add(states, obj); - } - - if (sock->connecting) { - obj = json_object_new_string("connecting"); - CHECKMEM(obj); - json_object_array_add(states, obj); - } - - if (sock->bound) { - obj = json_object_new_string("bound"); - CHECKMEM(obj); - json_object_array_add(states, obj); - } - - UNLOCK(&sock->lock); - sock = ISC_LIST_NEXT(sock, link); - } - - json_object_object_add(stats, "sockets", array); - array = NULL; - result = ISC_R_SUCCESS; - -error: - if (array != NULL) { - json_object_put(array); - } - - if (sock != NULL) { - UNLOCK(&sock->lock); - } - - UNLOCK(&mgr->lock); - - return (result); -} -#endif /* HAVE_JSON_C */ diff --git a/lib/isc/socket_p.h b/lib/isc/socket_p.h deleted file mode 100644 index c99d246d50..0000000000 --- a/lib/isc/socket_p.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (C) Internet Systems Consortium, Inc. ("ISC") - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, you can obtain one at https://mozilla.org/MPL/2.0/. - * - * See the COPYRIGHT file distributed with this work for additional - * information regarding copyright ownership. - */ - -#pragma once - -#include -#include -#include - -isc_result_t -isc__socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp, - unsigned int maxsocks, int nthreads); -/*%< - * Create a socket manager. If "maxsocks" is non-zero, it specifies the - * maximum number of sockets that the created manager should handle. - * - * Notes: - * - *\li All memory will be allocated in memory context 'mctx'. - * - * Requires: - * - *\li 'mctx' is a valid memory context. - * - *\li 'managerp' points to a NULL isc_socketmgr_t. - * - * Ensures: - * - *\li '*managerp' is a valid isc_socketmgr_t. - * - * Returns: - * - *\li #ISC_R_SUCCESS - *\li #ISC_R_NOMEMORY - *\li #ISC_R_UNEXPECTED - *\li #ISC_R_NOTIMPLEMENTED - */ - -void -isc__socketmgr_destroy(isc_socketmgr_t **managerp); -/*%< - * Destroy a socket manager. - * - * Notes: - * - *\li This routine blocks until there are no sockets left in the manager, - * so if the caller holds any socket references using the manager, it - * must detach them before calling isc_socketmgr_destroy() or it will - * block forever. - * - * Requires: - * - *\li '*managerp' is a valid isc_socketmgr_t. - * - *\li All sockets managed by this manager are fully detached. - * - * Ensures: - * - *\li *managerp == NULL - * - *\li All resources used by the manager have been freed. - */ - -#include - -typedef struct isc_socketwait isc_socketwait_t; -int -isc__socketmgr_waitevents(isc_socketmgr_t *, struct timeval *, - isc_socketwait_t **); -isc_result_t -isc__socketmgr_dispatch(isc_socketmgr_t *, isc_socketwait_t *); diff --git a/lib/isc/tests/Makefile.am b/lib/isc/tests/Makefile.am index de488951ab..d84e167e1b 100644 --- a/lib/isc/tests/Makefile.am +++ b/lib/isc/tests/Makefile.am @@ -9,7 +9,6 @@ LDADD += \ check_LTLIBRARIES = libisctest.la libisctest_la_SOURCES = \ - ../socket_p.h \ isctest.c \ isctest.h \ uv_wrap.h @@ -40,7 +39,6 @@ check_PROGRAMS = \ safe_test \ siphash_test \ sockaddr_test \ - socket_test \ stats_test \ symtab_test \ task_test \ diff --git a/lib/isc/tests/isctest.c b/lib/isc/tests/isctest.c index 6975501ef7..af8fa5183f 100644 --- a/lib/isc/tests/isctest.c +++ b/lib/isc/tests/isctest.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -33,7 +32,6 @@ isc_mem_t *test_mctx = NULL; isc_log_t *test_lctx = NULL; isc_taskmgr_t *taskmgr = NULL; isc_timermgr_t *timermgr = NULL; -isc_socketmgr_t *socketmgr = NULL; isc_nm_t *netmgr = NULL; isc_task_t *maintask = NULL; int ncpus; @@ -61,8 +59,7 @@ cleanup_managers(void) { } isc_managers_destroy(netmgr == NULL ? NULL : &netmgr, taskmgr == NULL ? NULL : &taskmgr, - timermgr == NULL ? NULL : &timermgr, - socketmgr == NULL ? NULL : &socketmgr); + timermgr == NULL ? NULL : &timermgr); } static isc_result_t @@ -80,8 +77,8 @@ create_managers(unsigned int workers) { INSIST(workers != 0); isc_hp_init(6 * workers); - isc_managers_create(test_mctx, workers, 0, 0, &netmgr, &taskmgr, - &timermgr, &socketmgr); + isc_managers_create(test_mctx, workers, 0, &netmgr, &taskmgr, + &timermgr); CHECK(isc_task_create_bound(taskmgr, 0, &maintask, 0)); isc_taskmgr_setexcltask(taskmgr, maintask); diff --git a/lib/isc/tests/isctest.h b/lib/isc/tests/isctest.h index 74196f5861..f773559b2d 100644 --- a/lib/isc/tests/isctest.h +++ b/lib/isc/tests/isctest.h @@ -39,7 +39,6 @@ extern isc_mem_t *test_mctx; extern isc_log_t *test_lctx; extern isc_taskmgr_t *taskmgr; extern isc_timermgr_t *timermgr; -extern isc_socketmgr_t *socketmgr; extern isc_nm_t *netmgr; extern int ncpus; diff --git a/lib/isc/tests/socket_test.c b/lib/isc/tests/socket_test.c deleted file mode 100644 index a98fc11906..0000000000 --- a/lib/isc/tests/socket_test.c +++ /dev/null @@ -1,734 +0,0 @@ -/* - * Copyright (C) Internet Systems Consortium, Inc. ("ISC") - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, you can obtain one at https://mozilla.org/MPL/2.0/. - * - * See the COPYRIGHT file distributed with this work for additional - * information regarding copyright ownership. - */ - -/*! \file */ - -#if HAVE_CMOCKA -#include -#include /* IWYU pragma: keep */ -#include -#include -#include -#include -#include -#include -#include - -#define UNIT_TESTING -#include - -#include -#include -#include -#include - -#include "../socket_p.h" -#include "isctest.h" - -static bool recv_dscp; -static unsigned int recv_dscp_value; -static bool recv_trunc; -isc_socket_t *s1 = NULL, *s2 = NULL, *s3 = NULL; -isc_task_t *test_task = NULL; - -/* - * Helper functions - */ - -static int -_setup(void **state) { - isc_result_t result; - - UNUSED(state); - - result = isc_test_begin(NULL, true, 0); - assert_int_equal(result, ISC_R_SUCCESS); - - return (0); -} - -static int -_teardown(void **state) { - UNUSED(state); - - if (s1 != NULL) { - isc_socket_detach(&s1); - } - if (s2 != NULL) { - isc_socket_detach(&s2); - } - if (s3 != NULL) { - isc_socket_detach(&s3); - } - if (test_task != NULL) { - isc_task_detach(&test_task); - } - - isc_test_end(); - - return (0); -} - -typedef struct { - atomic_bool done; - atomic_uintptr_t socket; - isc_result_t result; -} completion_t; - -static void -completion_init(completion_t *completion) { - atomic_init(&completion->done, false); - atomic_init(&completion->socket, (uintptr_t)NULL); -} - -static void -accept_done(isc_task_t *task, isc_event_t *event) { - isc_socket_newconnev_t *nevent = (isc_socket_newconnev_t *)event; - completion_t *completion = event->ev_arg; - - UNUSED(task); - - completion->result = nevent->result; - atomic_store(&completion->done, true); - if (completion->result == ISC_R_SUCCESS) { - atomic_store(&completion->socket, (uintptr_t)nevent->newsocket); - } - - isc_event_free(&event); -} - -static void -event_done(isc_task_t *task, isc_event_t *event) { - isc_socketevent_t *sev = NULL; - isc_socket_connev_t *connev = NULL; - completion_t *completion = event->ev_arg; - UNUSED(task); - - switch (event->ev_type) { - case ISC_SOCKEVENT_RECVDONE: - case ISC_SOCKEVENT_SENDDONE: - sev = (isc_socketevent_t *)event; - completion->result = sev->result; - if ((sev->attributes & ISC_SOCKEVENTATTR_DSCP) != 0) { - recv_dscp = true; - recv_dscp_value = sev->dscp; - } else { - recv_dscp = false; - } - recv_trunc = ((sev->attributes & ISC_SOCKEVENTATTR_TRUNC) != 0); - break; - case ISC_SOCKEVENT_CONNECT: - connev = (isc_socket_connev_t *)event; - completion->result = connev->result; - break; - default: - assert_false(true); - } - atomic_store(&completion->done, true); - isc_event_free(&event); -} - -static void -waitfor(completion_t *completion) { - int i = 0; - while (!atomic_load(&completion->done) && i++ < 5000) { - isc_test_nap(10000); - } - assert_true(atomic_load(&completion->done)); -} - -static void -waitfor2(completion_t *c1, completion_t *c2) { - int i = 0; - - while (!(atomic_load(&c1->done) && atomic_load(&c2->done)) && - i++ < 5000) { - isc_test_nap(10000); - } - assert_true(atomic_load(&c1->done) && atomic_load(&c2->done)); -} - -/* - * Individual unit tests - */ - -/* Test UDP sendto/recv (IPv4) */ -static void -udp_sendto_test(void **state) { - isc_result_t result; - isc_sockaddr_t addr1, addr2; - struct in_addr in; - char sendbuf[BUFSIZ], recvbuf[BUFSIZ]; - completion_t completion; - isc_region_t r; - - UNUSED(state); - - in.s_addr = inet_addr("127.0.0.1"); - isc_sockaddr_fromin(&addr1, &in, 0); - isc_sockaddr_fromin(&addr2, &in, 0); - - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s1); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s1, &addr1, 0); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s1, &addr1); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr1) != 0); - - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s2); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s2, &addr2, 0); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s2, &addr2); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr2) != 0); - - result = isc_task_create(taskmgr, 0, &test_task); - assert_int_equal(result, ISC_R_SUCCESS); - - snprintf(sendbuf, sizeof(sendbuf), "Hello"); - r.base = (void *)sendbuf; - r.length = strlen(sendbuf) + 1; - - completion_init(&completion); - result = isc_socket_sendto(s1, &r, test_task, event_done, &completion, - &addr2, NULL); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - - r.base = (void *)recvbuf; - r.length = BUFSIZ; - completion_init(&completion); - result = isc_socket_recv(s2, &r, 1, test_task, event_done, &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_string_equal(recvbuf, "Hello"); -} - -/* Test UDP sendto/recv (IPv4) */ -static void -udp_dscp_v4_test(void **state) { - isc_result_t result; - isc_sockaddr_t addr1, addr2; - struct in_addr in; - char sendbuf[BUFSIZ], recvbuf[BUFSIZ]; - completion_t completion; - isc_region_t r; - isc_socketevent_t *socketevent; - - UNUSED(state); - - in.s_addr = inet_addr("127.0.0.1"); - isc_sockaddr_fromin(&addr1, &in, 0); - isc_sockaddr_fromin(&addr2, &in, 0); - - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s1); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s1, &addr1, ISC_SOCKET_REUSEADDRESS); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s1, &addr1); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr1) != 0); - - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s2); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s2, &addr2, ISC_SOCKET_REUSEADDRESS); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s2, &addr2); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr2) != 0); - - result = isc_task_create(taskmgr, 0, &test_task); - assert_int_equal(result, ISC_R_SUCCESS); - - snprintf(sendbuf, sizeof(sendbuf), "Hello"); - r.base = (void *)sendbuf; - r.length = strlen(sendbuf) + 1; - - completion_init(&completion); - - socketevent = isc_socket_socketevent( - test_mctx, s1, ISC_SOCKEVENT_SENDDONE, event_done, &completion); - assert_non_null(socketevent); - - if ((isc_net_probedscp() & ISC_NET_DSCPPKTV4) != 0) { - socketevent->dscp = 056; /* EF */ - socketevent->attributes |= ISC_SOCKEVENTATTR_DSCP; - } else if ((isc_net_probedscp() & ISC_NET_DSCPSETV4) != 0) { - isc_socket_dscp(s1, 056); /* EF */ - socketevent->dscp = 0; - socketevent->attributes &= ~ISC_SOCKEVENTATTR_DSCP; - } - - recv_dscp = false; - recv_dscp_value = 0; - - result = isc_socket_sendto2(s1, &r, test_task, &addr2, NULL, - socketevent, 0); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - - r.base = (void *)recvbuf; - r.length = BUFSIZ; - completion_init(&completion); - result = isc_socket_recv(s2, &r, 1, test_task, event_done, &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_string_equal(recvbuf, "Hello"); - - if ((isc_net_probedscp() & ISC_NET_DSCPRECVV4) != 0) { - assert_true(recv_dscp); - assert_int_equal(recv_dscp_value, 056); - } else { - assert_false(recv_dscp); - } -} - -/* Test UDP sendto/recv (IPv6) */ -static void -udp_dscp_v6_test(void **state) { - isc_result_t result; - isc_sockaddr_t addr1, addr2; - struct in6_addr in6; - char sendbuf[BUFSIZ], recvbuf[BUFSIZ]; - completion_t completion; - isc_region_t r; - isc_socketevent_t *socketevent; - int n; - - UNUSED(state); - - n = inet_pton(AF_INET6, "::1", &in6.s6_addr); - assert_true(n == 1); - isc_sockaddr_fromin6(&addr1, &in6, 0); - isc_sockaddr_fromin6(&addr2, &in6, 0); - - result = isc_socket_create(socketmgr, PF_INET6, isc_sockettype_udp, - &s1); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s1, &addr1, 0); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s1, &addr1); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr1) != 0); - - result = isc_socket_create(socketmgr, PF_INET6, isc_sockettype_udp, - &s2); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s2, &addr2, 0); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s2, &addr2); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr2) != 0); - - result = isc_task_create(taskmgr, 0, &test_task); - assert_int_equal(result, ISC_R_SUCCESS); - - snprintf(sendbuf, sizeof(sendbuf), "Hello"); - r.base = (void *)sendbuf; - r.length = strlen(sendbuf) + 1; - - completion_init(&completion); - - socketevent = isc_socket_socketevent( - test_mctx, s1, ISC_SOCKEVENT_SENDDONE, event_done, &completion); - assert_non_null(socketevent); - - if ((isc_net_probedscp() & ISC_NET_DSCPPKTV6) != 0) { - socketevent->dscp = 056; /* EF */ - socketevent->attributes = ISC_SOCKEVENTATTR_DSCP; - } else if ((isc_net_probedscp() & ISC_NET_DSCPSETV6) != 0) { - isc_socket_dscp(s1, 056); /* EF */ - } - - recv_dscp = false; - recv_dscp_value = 0; - - result = isc_socket_sendto2(s1, &r, test_task, &addr2, NULL, - socketevent, 0); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - - r.base = (void *)recvbuf; - r.length = BUFSIZ; - completion_init(&completion); - result = isc_socket_recv(s2, &r, 1, test_task, event_done, &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_string_equal(recvbuf, "Hello"); - if ((isc_net_probedscp() & ISC_NET_DSCPRECVV6) != 0) { - assert_true(recv_dscp); - assert_int_equal(recv_dscp_value, 056); - } else { - assert_false(recv_dscp); - } -} - -/* Test TCP sendto/recv (IPv4) */ -static void -tcp_dscp_v4_test(void **state) { - isc_result_t result; - isc_sockaddr_t addr1; - struct in_addr in; - char sendbuf[BUFSIZ], recvbuf[BUFSIZ]; - completion_t completion, completion2; - isc_region_t r; - - UNUSED(state); - - in.s_addr = inet_addr("127.0.0.1"); - isc_sockaddr_fromin(&addr1, &in, 0); - - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_tcp, &s1); - assert_int_equal(result, ISC_R_SUCCESS); - - result = isc_socket_bind(s1, &addr1, 0); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s1, &addr1); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr1) != 0); - - result = isc_socket_listen(s1, 3); - assert_int_equal(result, ISC_R_SUCCESS); - - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_tcp, &s2); - assert_int_equal(result, ISC_R_SUCCESS); - - result = isc_task_create(taskmgr, 0, &test_task); - assert_int_equal(result, ISC_R_SUCCESS); - - completion_init(&completion2); - result = isc_socket_accept(s1, test_task, accept_done, &completion2); - assert_int_equal(result, ISC_R_SUCCESS); - - completion_init(&completion); - result = isc_socket_connect(s2, &addr1, test_task, event_done, - &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor2(&completion, &completion2); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_true(atomic_load(&completion2.done)); - assert_int_equal(completion2.result, ISC_R_SUCCESS); - s3 = (isc_socket_t *)atomic_load(&completion2.socket); - - isc_socket_dscp(s2, 056); /* EF */ - - snprintf(sendbuf, sizeof(sendbuf), "Hello"); - r.base = (void *)sendbuf; - r.length = strlen(sendbuf) + 1; - - recv_dscp = false; - recv_dscp_value = 0; - - completion_init(&completion); - result = isc_socket_sendto(s2, &r, test_task, event_done, &completion, - NULL, NULL); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - - r.base = (void *)recvbuf; - r.length = BUFSIZ; - completion_init(&completion); - result = isc_socket_recv(s3, &r, 1, test_task, event_done, &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_string_equal(recvbuf, "Hello"); - - if ((isc_net_probedscp() & ISC_NET_DSCPRECVV4) != 0) { - if (recv_dscp) { - assert_int_equal(recv_dscp_value, 056); - } - } else { - assert_false(recv_dscp); - } -} - -/* Test TCP sendto/recv (IPv6) */ -static void -tcp_dscp_v6_test(void **state) { - isc_result_t result; - isc_sockaddr_t addr1; - struct in6_addr in6; - char sendbuf[BUFSIZ], recvbuf[BUFSIZ]; - completion_t completion, completion2; - isc_region_t r; - int n; - - UNUSED(state); - - n = inet_pton(AF_INET6, "::1", &in6.s6_addr); - assert_true(n == 1); - isc_sockaddr_fromin6(&addr1, &in6, 0); - - result = isc_socket_create(socketmgr, PF_INET6, isc_sockettype_tcp, - &s1); - assert_int_equal(result, ISC_R_SUCCESS); - - result = isc_socket_bind(s1, &addr1, 0); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s1, &addr1); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr1) != 0); - - result = isc_socket_listen(s1, 3); - assert_int_equal(result, ISC_R_SUCCESS); - - result = isc_socket_create(socketmgr, PF_INET6, isc_sockettype_tcp, - &s2); - assert_int_equal(result, ISC_R_SUCCESS); - - result = isc_task_create(taskmgr, 0, &test_task); - assert_int_equal(result, ISC_R_SUCCESS); - - completion_init(&completion2); - result = isc_socket_accept(s1, test_task, accept_done, &completion2); - assert_int_equal(result, ISC_R_SUCCESS); - - completion_init(&completion); - result = isc_socket_connect(s2, &addr1, test_task, event_done, - &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor2(&completion, &completion2); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_true(atomic_load(&completion2.done)); - assert_int_equal(completion2.result, ISC_R_SUCCESS); - s3 = (isc_socket_t *)atomic_load(&completion2.socket); - - isc_socket_dscp(s2, 056); /* EF */ - - snprintf(sendbuf, sizeof(sendbuf), "Hello"); - r.base = (void *)sendbuf; - r.length = strlen(sendbuf) + 1; - - recv_dscp = false; - recv_dscp_value = 0; - - completion_init(&completion); - result = isc_socket_sendto(s2, &r, test_task, event_done, &completion, - NULL, NULL); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - - r.base = (void *)recvbuf; - r.length = BUFSIZ; - completion_init(&completion); - result = isc_socket_recv(s3, &r, 1, test_task, event_done, &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_string_equal(recvbuf, "Hello"); - - if ((isc_net_probedscp() & ISC_NET_DSCPRECVV6) != 0) { - /* - * IPV6_RECVTCLASS is undefined for TCP however - * if we do get it it should be the value we set. - */ - if (recv_dscp) { - assert_int_equal(recv_dscp_value, 056); - } - } else { - assert_false(recv_dscp); - } -} - -/* probe dscp capabilities */ -static void -net_probedscp_test(void **state) { - unsigned int n; - - UNUSED(state); - - n = isc_net_probedscp(); - assert_true((n & ~ISC_NET_DSCPALL) == 0); - - /* ISC_NET_DSCPSETV4 MUST be set if any is set. */ - if (n & (ISC_NET_DSCPPKTV4 | ISC_NET_DSCPRECVV4)) { - assert_true((n & ISC_NET_DSCPSETV4) != 0); - } - - /* ISC_NET_DSCPSETV6 MUST be set if any is set. */ - if (n & (ISC_NET_DSCPPKTV6 | ISC_NET_DSCPRECVV6)) { - assert_true((n & ISC_NET_DSCPSETV6) != 0); - } - -#if 0 - fprintf(stdout,"IPv4:%s%s%s\n", - (n & ISC_NET_DSCPSETV4) ? " set" : "none", - (n & ISC_NET_DSCPPKTV4) ? " packet" : "", - (n & ISC_NET_DSCPRECVV4) ? " receive" : ""); - - fprintf(stdout,"IPv6:%s%s%s\n", - (n & ISC_NET_DSCPSETV6) ? " set" : "none", - (n & ISC_NET_DSCPPKTV6) ? " packet" : "", - (n & ISC_NET_DSCPRECVV6) ? " receive" : ""); -#endif /* if 0 */ -} - -/* Test UDP truncation detection */ -static void -udp_trunc_test(void **state) { - isc_result_t result; - isc_sockaddr_t addr1, addr2; - struct in_addr in; - char sendbuf[BUFSIZ * 2], recvbuf[BUFSIZ]; - completion_t completion; - isc_region_t r; - isc_socketevent_t *socketevent; - - UNUSED(state); - - in.s_addr = inet_addr("127.0.0.1"); - isc_sockaddr_fromin(&addr1, &in, 0); - isc_sockaddr_fromin(&addr2, &in, 0); - - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s1); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s1, &addr1, ISC_SOCKET_REUSEADDRESS); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s1, &addr1); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr1) != 0); - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s2); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s2, &addr2, ISC_SOCKET_REUSEADDRESS); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s2, &addr2); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr2) != 0); - - result = isc_task_create(taskmgr, 0, &test_task); - assert_int_equal(result, ISC_R_SUCCESS); - - /* - * Send a message that will not be truncated. - */ - memset(sendbuf, 0xff, sizeof(sendbuf)); - snprintf(sendbuf, sizeof(sendbuf), "Hello"); - r.base = (void *)sendbuf; - r.length = strlen(sendbuf) + 1; - - completion_init(&completion); - - socketevent = isc_socket_socketevent( - test_mctx, s1, ISC_SOCKEVENT_SENDDONE, event_done, &completion); - assert_non_null(socketevent); - - result = isc_socket_sendto2(s1, &r, test_task, &addr2, NULL, - socketevent, 0); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - - r.base = (void *)recvbuf; - r.length = BUFSIZ; - completion_init(&completion); - recv_trunc = false; - result = isc_socket_recv(s2, &r, 1, test_task, event_done, &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_string_equal(recvbuf, "Hello"); - assert_false(recv_trunc); - - /* - * Send a message that will be truncated. - */ - memset(sendbuf, 0xff, sizeof(sendbuf)); - snprintf(sendbuf, sizeof(sendbuf), "Hello"); - r.base = (void *)sendbuf; - r.length = sizeof(sendbuf); - - completion_init(&completion); - - socketevent = isc_socket_socketevent( - test_mctx, s1, ISC_SOCKEVENT_SENDDONE, event_done, &completion); - assert_non_null(socketevent); - - result = isc_socket_sendto2(s1, &r, test_task, &addr2, NULL, - socketevent, 0); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - - r.base = (void *)recvbuf; - r.length = BUFSIZ; - completion_init(&completion); - recv_trunc = false; - result = isc_socket_recv(s2, &r, 1, test_task, event_done, &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_string_equal(recvbuf, "Hello"); - assert_true(recv_trunc); -} - -/* - * Main - */ -int -main(void) { - const struct CMUnitTest tests[] = { - cmocka_unit_test_setup_teardown(udp_sendto_test, _setup, - _teardown), - cmocka_unit_test_setup_teardown(tcp_dscp_v4_test, _setup, - _teardown), - cmocka_unit_test_setup_teardown(tcp_dscp_v6_test, _setup, - _teardown), - cmocka_unit_test_setup_teardown(udp_dscp_v4_test, _setup, - _teardown), - cmocka_unit_test_setup_teardown(udp_dscp_v6_test, _setup, - _teardown), - cmocka_unit_test_setup_teardown(net_probedscp_test, _setup, - _teardown), - cmocka_unit_test_setup_teardown(udp_trunc_test, _setup, - _teardown), - }; - - return (cmocka_run_group_tests(tests, NULL, NULL)); -} - -#else /* HAVE_CMOCKA */ - -#include - -int -main(void) { - printf("1..0 # Skipped: cmocka not available\n"); - return (SKIPPED_TEST_EXIT_CODE); -} - -#endif /* if HAVE_CMOCKA */ diff --git a/lib/isc/tests/task_test.c b/lib/isc/tests/task_test.c index 2be810d179..1b6d75d47d 100644 --- a/lib/isc/tests/task_test.c +++ b/lib/isc/tests/task_test.c @@ -721,7 +721,7 @@ manytasks(void **state) { isc_mem_debugging = ISC_MEM_DEBUGRECORD; isc_mem_create(&mctx); - isc_managers_create(mctx, 4, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(mctx, 4, 0, &netmgr, &taskmgr, NULL); atomic_init(&done, false); @@ -736,7 +736,7 @@ manytasks(void **state) { } UNLOCK(&lock); - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); isc_mem_destroy(&mctx); isc_condition_destroy(&cv); diff --git a/lib/isccfg/namedconf.c b/lib/isccfg/namedconf.c index fcbd929106..4067adf093 100644 --- a/lib/isccfg/namedconf.c +++ b/lib/isccfg/namedconf.c @@ -1271,7 +1271,7 @@ static cfg_clausedef_t options_clauses[] = { { "random-device", &cfg_type_qstringornone, 0 }, { "recursing-file", &cfg_type_qstring, 0 }, { "recursive-clients", &cfg_type_uint32, 0 }, - { "reserved-sockets", &cfg_type_uint32, 0 }, + { "reserved-sockets", &cfg_type_uint32, CFG_CLAUSEFLAG_DEPRECATED }, { "secroots-file", &cfg_type_qstring, 0 }, { "serial-queries", NULL, CFG_CLAUSEFLAG_ANCIENT }, { "serial-query-rate", &cfg_type_uint32, 0 }, diff --git a/lib/ns/include/ns/interfacemgr.h b/lib/ns/include/ns/interfacemgr.h index db16ccd70d..fc9e10aadf 100644 --- a/lib/ns/include/ns/interfacemgr.h +++ b/lib/ns/include/ns/interfacemgr.h @@ -97,10 +97,9 @@ struct ns_interface { isc_result_t ns_interfacemgr_create(isc_mem_t *mctx, ns_server_t *sctx, isc_taskmgr_t *taskmgr, isc_timermgr_t *timermgr, - isc_socketmgr_t *socketmgr, isc_nm_t *nm, - dns_dispatchmgr_t *dispatchmgr, isc_task_t *task, - dns_geoip_databases_t *geoip, int ncpus, - ns_interfacemgr_t **mgrp); + isc_nm_t *nm, dns_dispatchmgr_t *dispatchmgr, + isc_task_t *task, dns_geoip_databases_t *geoip, + int ncpus, ns_interfacemgr_t **mgrp); /*%< * Create a new interface manager. * diff --git a/lib/ns/interfacemgr.c b/lib/ns/interfacemgr.c index 8e286e23f7..fdb7338b1c 100644 --- a/lib/ns/interfacemgr.c +++ b/lib/ns/interfacemgr.c @@ -66,14 +66,13 @@ struct ns_interfacemgr { unsigned int magic; /*%< Magic number */ isc_refcount_t references; isc_mutex_t lock; - isc_mem_t *mctx; /*%< Memory context */ - ns_server_t *sctx; /*%< Server context */ - isc_taskmgr_t *taskmgr; /*%< Task manager */ - isc_task_t *excl; /*%< Exclusive task */ - isc_timermgr_t *timermgr; /*%< Timer manager */ - isc_socketmgr_t *socketmgr; /*%< Socket manager */ - isc_nm_t *nm; /*%< Net manager */ - int ncpus; /*%< Number of workers */ + isc_mem_t *mctx; /*%< Memory context */ + ns_server_t *sctx; /*%< Server context */ + isc_taskmgr_t *taskmgr; /*%< Task manager */ + isc_task_t *excl; /*%< Exclusive task */ + isc_timermgr_t *timermgr; /*%< Timer manager */ + isc_nm_t *nm; /*%< Net manager */ + int ncpus; /*%< Number of workers */ dns_dispatchmgr_t *dispatchmgr; unsigned int generation; /*%< Current generation no */ ns_listenlist_t *listenon4; @@ -189,10 +188,9 @@ route_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { isc_result_t ns_interfacemgr_create(isc_mem_t *mctx, ns_server_t *sctx, isc_taskmgr_t *taskmgr, isc_timermgr_t *timermgr, - isc_socketmgr_t *socketmgr, isc_nm_t *nm, - dns_dispatchmgr_t *dispatchmgr, isc_task_t *task, - dns_geoip_databases_t *geoip, int ncpus, - ns_interfacemgr_t **mgrp) { + isc_nm_t *nm, dns_dispatchmgr_t *dispatchmgr, + isc_task_t *task, dns_geoip_databases_t *geoip, + int ncpus, ns_interfacemgr_t **mgrp) { isc_result_t result; ns_interfacemgr_t *mgr = NULL; @@ -205,7 +203,6 @@ ns_interfacemgr_create(isc_mem_t *mctx, ns_server_t *sctx, mgr = isc_mem_get(mctx, sizeof(*mgr)); *mgr = (ns_interfacemgr_t){ .taskmgr = taskmgr, .timermgr = timermgr, - .socketmgr = socketmgr, .nm = nm, .dispatchmgr = dispatchmgr, .generation = 1, diff --git a/lib/ns/tests/nstest.c b/lib/ns/tests/nstest.c index a1bbba73f4..9c291e66bc 100644 --- a/lib/ns/tests/nstest.c +++ b/lib/ns/tests/nstest.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -57,7 +56,6 @@ isc_nm_t *netmgr = NULL; isc_taskmgr_t *taskmgr = NULL; isc_task_t *maintask = NULL; isc_timermgr_t *timermgr = NULL; -isc_socketmgr_t *socketmgr = NULL; dns_zonemgr_t *zonemgr = NULL; dns_dispatchmgr_t *dispatchmgr = NULL; ns_clientmgr_t *clientmgr = NULL; @@ -202,8 +200,7 @@ cleanup_managers(void) { isc_managers_destroy(netmgr == NULL ? NULL : &netmgr, taskmgr == NULL ? NULL : &taskmgr, - timermgr == NULL ? NULL : &timermgr, - socketmgr == NULL ? NULL : &socketmgr); + timermgr == NULL ? NULL : &timermgr); if (app_running) { isc_app_finish(); @@ -226,8 +223,7 @@ create_managers(void) { isc_event_t *event = NULL; ncpus = isc_os_ncpus(); - isc_managers_create(mctx, ncpus, 0, 0, &netmgr, &taskmgr, &timermgr, - &socketmgr); + isc_managers_create(mctx, ncpus, 0, &netmgr, &taskmgr, &timermgr); CHECK(isc_task_create_bound(taskmgr, 0, &maintask, 0)); isc_taskmgr_setexcltask(taskmgr, maintask); CHECK(isc_task_onshutdown(maintask, shutdown_managers, NULL)); @@ -236,8 +232,8 @@ create_managers(void) { CHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); - CHECK(ns_interfacemgr_create(mctx, sctx, taskmgr, timermgr, socketmgr, - netmgr, dispatchmgr, maintask, NULL, ncpus, + CHECK(ns_interfacemgr_create(mctx, sctx, taskmgr, timermgr, netmgr, + dispatchmgr, maintask, NULL, ncpus, &interfacemgr)); CHECK(ns_listenlist_default(mctx, port, -1, true, &listenon)); diff --git a/lib/ns/tests/nstest.h b/lib/ns/tests/nstest.h index 8b3b2ad516..ac16abd166 100644 --- a/lib/ns/tests/nstest.h +++ b/lib/ns/tests/nstest.h @@ -54,7 +54,6 @@ extern isc_log_t *lctx; extern isc_taskmgr_t *taskmgr; extern isc_task_t *maintask; extern isc_timermgr_t *timermgr; -extern isc_socketmgr_t *socketmgr; extern dns_zonemgr_t *zonemgr; extern dns_dispatchmgr_t *dispatchmgr; extern ns_clientmgr_t *clientmgr; diff --git a/util/copyrights b/util/copyrights index 49911e7604..ea2e3051c4 100644 --- a/util/copyrights +++ b/util/copyrights @@ -1691,7 +1691,6 @@ ./lib/isc/include/isc/serial.h C 1999,2000,2001,2004,2005,2006,2007,2009,2016,2018,2019,2020,2021 ./lib/isc/include/isc/siphash.h C 2019,2020,2021 ./lib/isc/include/isc/sockaddr.h C 1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2009,2012,2015,2016,2018,2019,2020,2021 -./lib/isc/include/isc/socket.h C 1998,1999,2000,2001,2002,2004,2005,2006,2007,2008,2009,2011,2012,2013,2014,2016,2018,2019,2020,2021 ./lib/isc/include/isc/stat.h C 2004,2007,2014,2016,2018,2019,2020,2021 ./lib/isc/include/isc/stats.h C 2009,2012,2016,2018,2019,2020,2021 ./lib/isc/include/isc/stdatomic.h C 2019,2020,2021 @@ -1761,8 +1760,6 @@ ./lib/isc/serial.c C 1999,2000,2001,2004,2005,2007,2016,2018,2019,2020,2021 ./lib/isc/siphash.c C 2019,2020,2021 ./lib/isc/sockaddr.c C 1999,2000,2001,2002,2003,2004,2005,2006,2007,2010,2011,2012,2014,2015,2016,2017,2018,2019,2020,2021 -./lib/isc/socket.c C 1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021 -./lib/isc/socket_p.h C 2021 ./lib/isc/stats.c C 2009,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021 ./lib/isc/stdio.c C 2000,2001,2004,2007,2011,2012,2013,2014,2016,2018,2019,2020,2021 ./lib/isc/stdtime.c C 1999,2000,2001,2004,2005,2007,2016,2018,2019,2020,2021 @@ -1800,7 +1797,6 @@ ./lib/isc/tests/safe_test.c C 2013,2015,2016,2017,2018,2019,2020,2021 ./lib/isc/tests/siphash_test.c C 2019,2020,2021 ./lib/isc/tests/sockaddr_test.c C 2012,2015,2016,2017,2018,2019,2020,2021 -./lib/isc/tests/socket_test.c C 2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021 ./lib/isc/tests/stats_test.c C 2021 ./lib/isc/tests/symtab_test.c C 2011,2012,2013,2016,2018,2019,2020,2021 ./lib/isc/tests/task_test.c C 2011,2012,2016,2017,2018,2019,2020,2021 From ab98e95f4c31772df1daa728441b7107f4554ed3 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Sun, 3 Oct 2021 01:01:40 -0700 Subject: [PATCH 5/6] Don't use route socket in unit tests Some of the libns unit tests override the isc_nmhandle_attach() and _detach() functions. This causes a failure in ns_interface_create() if a route socket is being used, so we add a parameter to disable it. --- bin/named/server.c | 2 +- lib/ns/include/ns/interfacemgr.h | 2 +- lib/ns/interfacemgr.c | 14 ++++++++------ lib/ns/tests/nstest.c | 8 +------- 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/bin/named/server.c b/bin/named/server.c index e3cd057bb8..f50f4db850 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -9825,7 +9825,7 @@ run_server(isc_task_t *task, isc_event_t *event) { named_g_taskmgr, named_g_timermgr, named_g_netmgr, named_g_dispatchmgr, server->task, geoip, named_g_cpus, - &server->interfacemgr), + true, &server->interfacemgr), "creating interface manager"); CHECKFATAL(isc_timer_create(named_g_timermgr, isc_timertype_inactive, diff --git a/lib/ns/include/ns/interfacemgr.h b/lib/ns/include/ns/interfacemgr.h index fc9e10aadf..c52392156a 100644 --- a/lib/ns/include/ns/interfacemgr.h +++ b/lib/ns/include/ns/interfacemgr.h @@ -99,7 +99,7 @@ ns_interfacemgr_create(isc_mem_t *mctx, ns_server_t *sctx, isc_taskmgr_t *taskmgr, isc_timermgr_t *timermgr, isc_nm_t *nm, dns_dispatchmgr_t *dispatchmgr, isc_task_t *task, dns_geoip_databases_t *geoip, - int ncpus, ns_interfacemgr_t **mgrp); + int ncpus, bool scan, ns_interfacemgr_t **mgrp); /*%< * Create a new interface manager. * diff --git a/lib/ns/interfacemgr.c b/lib/ns/interfacemgr.c index fdb7338b1c..316d0142f4 100644 --- a/lib/ns/interfacemgr.c +++ b/lib/ns/interfacemgr.c @@ -190,7 +190,7 @@ ns_interfacemgr_create(isc_mem_t *mctx, ns_server_t *sctx, isc_taskmgr_t *taskmgr, isc_timermgr_t *timermgr, isc_nm_t *nm, dns_dispatchmgr_t *dispatchmgr, isc_task_t *task, dns_geoip_databases_t *geoip, - int ncpus, ns_interfacemgr_t **mgrp) { + int ncpus, bool scan, ns_interfacemgr_t **mgrp) { isc_result_t result; ns_interfacemgr_t *mgr = NULL; @@ -242,11 +242,13 @@ ns_interfacemgr_create(isc_mem_t *mctx, ns_server_t *sctx, UNUSED(geoip); #endif /* if defined(HAVE_GEOIP2) */ - result = isc_nm_routeconnect(nm, route_connected, mgr, 0); - if (result != ISC_R_SUCCESS) { - isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_INFO, - "unable to open route socket: %s", - isc_result_totext(result)); + if (scan) { + result = isc_nm_routeconnect(nm, route_connected, mgr, 0); + if (result != ISC_R_SUCCESS) { + isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_INFO, + "unable to open route socket: %s", + isc_result_totext(result)); + } } isc_refcount_init(&mgr->references, 1); diff --git a/lib/ns/tests/nstest.c b/lib/ns/tests/nstest.c index 9c291e66bc..15614e1059 100644 --- a/lib/ns/tests/nstest.c +++ b/lib/ns/tests/nstest.c @@ -233,7 +233,7 @@ create_managers(void) { CHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); CHECK(ns_interfacemgr_create(mctx, sctx, taskmgr, timermgr, netmgr, - dispatchmgr, maintask, NULL, ncpus, + dispatchmgr, maintask, NULL, ncpus, false, &interfacemgr)); CHECK(ns_listenlist_default(mctx, port, -1, true, &listenon)); @@ -244,12 +244,6 @@ create_managers(void) { scan_interfaces, NULL, sizeof(isc_event_t)); isc_task_send(maintask, &event); - /* - * There's no straightforward way to determine - * whether the interfaces have been scanned, - * we'll just sleep for a bit and hope. - */ - ns_test_nap(500000); clientmgr = ns_interfacemgr_getclientmgr(interfacemgr); atomic_store(&run_managers, true); From 4919c7a227d56edf458c7f061d7767bb05283cc9 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Sun, 3 Oct 2021 01:06:46 -0700 Subject: [PATCH 6/6] CHANGES and release note for [GL #2926] --- CHANGES | 9 +++++++++ doc/notes/notes-current.rst | 3 +++ 2 files changed, 12 insertions(+) diff --git a/CHANGES b/CHANGES index e8cd99b5ce..1a5abf7d2d 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,12 @@ +5744. [func] The network manager is now used for netlink sockets + to monitor network interface changes. This was the + last remaining use of the old isc_socket and + isc_socketmgr APIs, so they have now been removed. + The "named -S" argument and the "reserved-sockets" + option in named.conf have no function now, and are + deprecated. "socketmgr" statistics are no longer + reported in the statistics channel. [GL #2926] + 5743. [func] Add finer-grained "update-policy" rules, "krb5-subdomain-self-rhs" and "ms-subdomain-self-rhs", which restrict SRV and PTR record changes, allowing diff --git a/doc/notes/notes-current.rst b/doc/notes/notes-current.rst index 5e82f9c4b0..98874357fd 100644 --- a/doc/notes/notes-current.rst +++ b/doc/notes/notes-current.rst @@ -79,6 +79,9 @@ Feature Changes including ``nsupdate``, ``delv``, ``mdig``, to send all outgoing DNS queries and requests. :gl:`#2401` +- Because the old socket manager API has been removed, "socketmgr" + statistics are no longer reported by the statistics channel. :gl:`#2926` + - Zone transfers over TLS (XoT) now need "dot" Application-Layer Protocol Negotiation (ALPN) tag to be negotiated, as required by the RFC 9103. :gl: `#2794`