diff --git a/.dir-locals.el b/.dir-locals.el index 9178b85847..fb7c4c072a 100644 --- a/.dir-locals.el +++ b/.dir-locals.el @@ -106,6 +106,9 @@ (list "--enable=all" "--suppress=missingIncludeSystem" + "--suppress=nullPointerRedundantCheck" + (concat "--suppressions-list=" (expand-file-name + (concat directory-of-current-dir-locals-file "util/suppressions.txt"))) (concat "-include=" (expand-file-name (concat directory-of-current-dir-locals-file "config.h"))) ) diff --git a/bin/dig/dighost.c b/bin/dig/dighost.c index 99aa077ea7..4346ce5d75 100644 --- a/bin/dig/dighost.c +++ b/bin/dig/dighost.c @@ -3232,7 +3232,10 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { REQUIRE(DIG_VALID_QUERY(query)); REQUIRE(query->handle == NULL); - REQUIRE(!free_now); + INSIST(!free_now); + + debug("tcp_connected(%p, %s, %p)", handle, isc_result_totext(eresult), + query); LOCK_LOOKUP; lookup_attach(query->lookup, &l); @@ -3303,7 +3306,10 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { launch_next_query(query); query_detach(&query); - isc_nmhandle_detach(&handle); + if (l->tls_mode) { + /* FIXME: This is a accounting bug in TLSDNS */ + isc_nmhandle_detach(&handle); + } lookup_detach(&l); UNLOCK_LOOKUP; } diff --git a/bin/named/server.c b/bin/named/server.c index 9761a07ffa..15c3ed62b1 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -8621,8 +8621,7 @@ load_configuration(const char *filename, named_server_t *server, advertised = MAX_TCP_TIMEOUT; } - isc_nm_tcp_settimeouts(named_g_nm, initial, idle, keepalive, - advertised); + isc_nm_settimeouts(named_g_nm, initial, idle, keepalive, advertised); /* * Configure sets of UDP query source ports. @@ -15950,8 +15949,8 @@ named_server_tcptimeouts(isc_lex_t *lex, isc_buffer_t **text) { return (ISC_R_UNEXPECTEDEND); } - isc_nm_tcp_gettimeouts(named_g_nm, &initial, &idle, &keepalive, - &advertised); + isc_nm_gettimeouts(named_g_nm, &initial, &idle, &keepalive, + &advertised); /* Look for optional arguments. */ ptr = next_token(lex, NULL); @@ -16000,8 +15999,8 @@ named_server_tcptimeouts(isc_lex_t *lex, isc_buffer_t **text) { result = isc_task_beginexclusive(named_g_server->task); RUNTIME_CHECK(result == ISC_R_SUCCESS); - isc_nm_tcp_settimeouts(named_g_nm, initial, idle, keepalive, - advertised); + isc_nm_settimeouts(named_g_nm, initial, idle, keepalive, + advertised); isc_task_endexclusive(named_g_server->task); } diff --git a/lib/dns/xfrin.c b/lib/dns/xfrin.c index 71a5eab2ef..2adfac46a6 100644 --- a/lib/dns/xfrin.c +++ b/lib/dns/xfrin.c @@ -961,8 +961,6 @@ xfrin_connect_done(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { CHECK(xfrin_send_request(xfr)); failure: - isc_nmhandle_detach(&handle); - if (result != ISC_R_SUCCESS && result != ISC_R_SHUTTINGDOWN) { xfrin_fail(xfr, result, "failed to connect"); } diff --git a/lib/isc/Makefile.am b/lib/isc/Makefile.am index 82eae08351..216189fcfd 100644 --- a/lib/isc/Makefile.am +++ b/lib/isc/Makefile.am @@ -127,6 +127,7 @@ libisc_la_SOURCES = \ netmgr/netmgr.c \ netmgr/tcp.c \ netmgr/tcpdns.c \ + netmgr/tlsdns.c \ netmgr/tls.c \ netmgr/udp.c \ netmgr/uv-compat.c \ diff --git a/lib/isc/include/isc/mutexatomic.h b/lib/isc/include/isc/mutexatomic.h index 1f4cd49cb9..3b6de79fb8 100644 --- a/lib/isc/include/isc/mutexatomic.h +++ b/lib/isc/include/isc/mutexatomic.h @@ -18,6 +18,7 @@ #endif /* HAVE_UCHAR_H */ #include +#include #if !defined(__has_feature) #define __has_feature(x) 0 diff --git a/lib/isc/include/isc/netmgr.h b/lib/isc/include/isc/netmgr.h index 349fb2f681..7bb21b48e4 100644 --- a/lib/isc/include/isc/netmgr.h +++ b/lib/isc/include/isc/netmgr.h @@ -111,10 +111,22 @@ isc_nmsocket_close(isc_nmsocket_t **sockp); * sockets with active handles, the socket will be closed. */ +#ifdef NETMGR_TRACE +#define isc_nmhandle_attach(handle, dest) \ + isc__nmhandle_attach(handle, dest, __FILE__, __LINE__, __func__) +#define isc_nmhandle_detach(handlep) \ + isc__nmhandle_detach(handlep, __FILE__, __LINE__, __func__) +#define FLARG , const char *file, unsigned int line, const char *func +#else +#define isc_nmhandle_attach(handle, dest) isc__nmhandle_attach(handle, dest) +#define isc_nmhandle_detach(handlep) isc__nmhandle_detach(handlep) +#define FLARG +#endif + void -isc_nmhandle_attach(isc_nmhandle_t *handle, isc_nmhandle_t **dest); +isc__nmhandle_attach(isc_nmhandle_t *handle, isc_nmhandle_t **dest FLARG); void -isc_nmhandle_detach(isc_nmhandle_t **handlep); +isc__nmhandle_detach(isc_nmhandle_t **handlep FLARG); /*%< * Increment/decrement the reference counter in a netmgr handle, * but (unlike the attach/detach functions) do not change the pointer @@ -127,6 +139,7 @@ isc_nmhandle_detach(isc_nmhandle_t **handlep); * otherwise know that the handle was in use and might free it, along * with the client.) */ +#undef FLARG void * isc_nmhandle_getdata(isc_nmhandle_t *handle); @@ -302,9 +315,6 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface, * If 'quota' is not NULL, then the socket is attached to the specified * quota. This allows us to enforce TCP client quota limits. * - * NOTE: This is currently only called inside isc_nm_listentcpdns(), which - * creates a 'wrapper' socket that sends and receives DNS messages - * prepended with a two-byte length field, and handles buffering. */ isc_result_t @@ -326,10 +336,11 @@ isc_nm_tcpconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer, */ isc_result_t -isc_nm_listentcpdns(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb, - void *cbarg, isc_nm_accept_cb_t accept_cb, - void *accept_cbarg, size_t extrahandlesize, int backlog, - isc_quota_t *quota, isc_nmsocket_t **sockp); +isc_nm_listentcpdns(isc_nm_t *mgr, isc_nmiface_t *iface, + isc_nm_recv_cb_t recv_cb, void *recv_cbarg, + isc_nm_accept_cb_t accept_cb, void *accept_cbarg, + size_t extrahandlesize, int backlog, isc_quota_t *quota, + isc_nmsocket_t **sockp); /*%< * Start listening for DNS messages over the TCP interface 'iface', using * net manager 'mgr'. @@ -391,8 +402,35 @@ isc_nm_tcpdns_keepalive(isc_nmhandle_t *handle, bool value); */ void -isc_nm_tcp_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle, - uint32_t keepalive, uint32_t advertised); +isc_nm_tlsdns_sequential(isc_nmhandle_t *handle); +/*%< + * Disable pipelining on this connection. Each DNS packet will be only + * processed after the previous completes. + * + * The socket must be unpaused after the query is processed. This is done + * the response is sent, or if we're dropping the query, it will be done + * when a handle is fully dereferenced by calling the socket's + * closehandle_cb callback. + * + * Note: This can only be run while a message is being processed; if it is + * run before any messages are read, no messages will be read. + * + * Also note: once this has been set, it cannot be reversed for a given + * connection. + */ + +void +isc_nm_tlsdns_keepalive(isc_nmhandle_t *handle, bool value); +/*%< + * Enable/disable keepalive on this connection by setting it to 'value'. + * + * When keepalive is active, we switch to using the keepalive timeout + * to determine when to close a connection, rather than the idle timeout. + */ + +void +isc_nm_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle, + uint32_t keepalive, uint32_t advertised); /*%< * Sets the initial, idle, and keepalive timeout values to use for * TCP connections, and the timeout value to advertise in responses using @@ -404,8 +442,8 @@ isc_nm_tcp_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle, */ void -isc_nm_tcp_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, - uint32_t *keepalive, uint32_t *advertised); +isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, + uint32_t *keepalive, uint32_t *advertised); /*%< * Gets the initial, idle, keepalive, or advertised timeout values, * in tenths of seconds. diff --git a/lib/isc/include/isc/quota.h b/lib/isc/include/isc/quota.h index 98d349e45a..3431006595 100644 --- a/lib/isc/include/isc/quota.h +++ b/lib/isc/include/isc/quota.h @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -44,6 +45,7 @@ ISC_LANG_BEGINDECLS typedef struct isc_quota_cb isc_quota_cb_t; typedef void (*isc_quota_cb_func_t)(isc_quota_t *quota, void *data); struct isc_quota_cb { + int magic; isc_quota_cb_func_t cb_func; void * data; ISC_LINK(isc_quota_cb_t) link; @@ -51,6 +53,7 @@ struct isc_quota_cb { /*% isc_quota structure */ struct isc_quota { + int magic; atomic_uint_fast32_t max; atomic_uint_fast32_t used; atomic_uint_fast32_t soft; diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index 4b56b1b772..df00fa1741 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -64,6 +64,76 @@ void isc__nm_dump_active(isc_nm_t *nm); +#if defined(__linux__) +#include +#define gettid() (uint32_t) syscall(SYS_gettid) +#elif defined(_WIN32) +#define gettid() (uint32_t) GetCurrentThreadId() +#else +#define gettid() (uint32_t) pthread_self() +#endif + +#ifdef NETMGR_TRACE_VERBOSE +#define NETMGR_TRACE_LOG(format, ...) \ + fprintf(stderr, "%" PRIu32 ":%d:%s:%u:%s:" format, gettid(), \ + isc_nm_tid(), file, line, func, __VA_ARGS__) +#else +#define NETMGR_TRACE_LOG(format, ...) \ + (void)file; \ + (void)line; \ + (void)func; +#endif + +#define FLARG_PASS , file, line, func +#define FLARG \ + , const char *file __attribute__((unused)), \ + unsigned int line __attribute__((unused)), \ + const char *func __attribute__((unused)) +#define FLARG_IEVENT(ievent) \ + const char *file = ievent->file; \ + unsigned int line = ievent->line; \ + const char *func = ievent->func; +#define FLARG_IEVENT_PASS(ievent) \ + ievent->file = file; \ + ievent->line = line; \ + ievent->func = func; +#define isc__nm_uvreq_get(req, sock) \ + isc___nm_uvreq_get(req, sock, __FILE__, __LINE__, __func__) +#define isc__nm_uvreq_put(req, sock) \ + isc___nm_uvreq_put(req, sock, __FILE__, __LINE__, __func__) +#define isc__nmsocket_init(sock, mgr, type, iface) \ + isc___nmsocket_init(sock, mgr, type, iface, __FILE__, __LINE__, \ + __func__) +#define isc__nmsocket_put(sockp) \ + isc___nmsocket_put(sockp, __FILE__, __LINE__, __func__) +#define isc__nmsocket_attach(sock, target) \ + isc___nmsocket_attach(sock, target, __FILE__, __LINE__, __func__) +#define isc__nmsocket_detach(socketp) \ + isc___nmsocket_detach(socketp, __FILE__, __LINE__, __func__) +#define isc__nmsocket_close(socketp) \ + isc___nmsocket_close(socketp, __FILE__, __LINE__, __func__) +#define isc__nmhandle_get(sock, peer, local) \ + isc___nmhandle_get(sock, peer, local, __FILE__, __LINE__, __func__) +#define isc__nmsocket_prep_destroy(sock) \ + isc___nmsocket_prep_destroy(sock, __FILE__, __LINE__, __func__) +#else +#define NETMGR_TRACE_LOG(format, ...) + +#define FLARG_PASS +#define FLARG +#define FLARG_IEVENT(ievent) +#define FLARG_IEVENT_PASS(ievent) +#define isc__nm_uvreq_get(req, sock) isc___nm_uvreq_get(req, sock) +#define isc__nm_uvreq_put(req, sock) isc___nm_uvreq_put(req, sock) +#define isc__nmsocket_init(sock, mgr, type, iface) \ + isc___nmsocket_init(sock, mgr, type, iface) +#define isc__nmsocket_put(sockp) isc___nmsocket_put(sockp) +#define isc__nmsocket_attach(sock, target) isc___nmsocket_attach(sock, target) +#define isc__nmsocket_detach(socketp) isc___nmsocket_detach(socketp) +#define isc__nmsocket_close(socketp) isc___nmsocket_close(socketp) +#define isc__nmhandle_get(sock, peer, local) \ + isc___nmhandle_get(sock, peer, local) +#define isc__nmsocket_prep_destroy(sock) isc___nmsocket_prep_destroy(sock) #endif /* @@ -149,12 +219,13 @@ typedef enum isc__netievent_type { netievent_tcpsend, netievent_tcpstartread, netievent_tcppauseread, - netievent_tcpchildaccept, netievent_tcpaccept, netievent_tcpstop, netievent_tcpcancel, netievent_tcpclose, + netievent_tcpdnsaccept, + netievent_tcpdnsconnect, netievent_tcpdnssend, netievent_tcpdnsread, netievent_tcpdnscancel, @@ -167,13 +238,20 @@ typedef enum isc__netievent_type { netievent_tlsconnect, netievent_tlsdobio, - netievent_closecb, + netievent_tlsdnsaccept, + netievent_tlsdnsconnect, + netievent_tlsdnssend, + netievent_tlsdnsread, + netievent_tlsdnscancel, + netievent_tlsdnsclose, + netievent_tlsdnsstop, + + netievent_close, netievent_shutdown, netievent_stop, netievent_pause, netievent_connectcb, - netievent_acceptcb, netievent_readcb, netievent_sendcb, @@ -184,6 +262,7 @@ typedef enum isc__netievent_type { */ netievent_udplisten, netievent_tcplisten, + netievent_tcpdnslisten, netievent_resume, netievent_detach, } isc__netievent_type; @@ -231,40 +310,107 @@ struct isc__nm_uvreq { ISC_LINK(isc__nm_uvreq_t) link; }; +void * +isc__nm_get_netievent(isc_nm_t *mgr, isc__netievent_type type); +/*%< + * Allocate an ievent and set the type. + */ +void +isc__nm_put_netievent(isc_nm_t *mgr, void *ievent); + +/* + * The macros here are used to simulate the "inheritance" in C, there's the base + * netievent structure that contains just its own type and socket, and there are + * extended netievent types that also have handles or requests or other data. + * + * The macros here ensure that: + * + * 1. every netievent type has matching definition, declaration and + * implementation + * + * 2. we handle all the netievent types of same subclass the same, e.g. if the + * extended netievent contains handle, we always attach to the handle in + * the ctor and detach from the handle in dtor. + * + * There are three macros here for each netievent subclass: + * + * 1. NETIEVENT_*_TYPE(type) creates the typedef for each type; used below in + * this header + * + * 2. NETIEVENT_*_DECL(type) generates the declaration of the get and put + * functions (isc__nm_get_netievent_* and isc__nm_put_netievent_*); used + * below in this header + * + * 3. NETIEVENT_*_DEF(type) generates the definition of the functions; used + * either in netmgr.c or matching protocol file (e.g. udp.c, tcp.c, etc.) + */ + +#define NETIEVENT__SOCKET \ + isc__netievent_type type; \ + isc_nmsocket_t *sock; \ + const char *file; \ + unsigned int line; \ + const char *func + typedef struct isc__netievent__socket { - isc__netievent_type type; - isc_nmsocket_t *sock; + NETIEVENT__SOCKET; } isc__netievent__socket_t; -typedef isc__netievent__socket_t isc__netievent_udplisten_t; -typedef isc__netievent__socket_t isc__netievent_udpread_t; -typedef isc__netievent__socket_t isc__netievent_udpstop_t; -typedef isc__netievent__socket_t isc__netievent_udpclose_t; -typedef isc__netievent__socket_t isc__netievent_tcpstop_t; +#define NETIEVENT_SOCKET_TYPE(type) \ + typedef isc__netievent__socket_t isc__netievent_##type##_t; -typedef isc__netievent__socket_t isc__netievent_tcpclose_t; -typedef isc__netievent__socket_t isc__netievent_startread_t; -typedef isc__netievent__socket_t isc__netievent_pauseread_t; -typedef isc__netievent__socket_t isc__netievent_closecb_t; +#define NETIEVENT_SOCKET_DECL(type) \ + isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ + isc_nm_t *nm, isc_nmsocket_t *sock); \ + void isc__nm_put_netievent_##type(isc_nm_t *nm, \ + isc__netievent_##type##_t *ievent); -typedef isc__netievent__socket_t isc__netievent_tcpdnsclose_t; -typedef isc__netievent__socket_t isc__netievent_tcpdnsread_t; -typedef isc__netievent__socket_t isc__netievent_tcpdnsstop_t; - -typedef isc__netievent__socket_t isc__netievent_tlsclose_t; -typedef isc__netievent__socket_t isc__netievent_tlsdobio_t; +#define NETIEVENT_SOCKET_DEF(type) \ + isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ + isc_nm_t *nm, isc_nmsocket_t *sock) { \ + isc__netievent_##type##_t *ievent = \ + isc__nm_get_netievent(nm, netievent_##type); \ + isc__nmsocket_attach(sock, &ievent->sock); \ + \ + return (ievent); \ + } \ + \ + void isc__nm_put_netievent_##type(isc_nm_t *nm, \ + isc__netievent_##type##_t *ievent) { \ + isc__nmsocket_detach(&ievent->sock); \ + isc__nm_put_netievent(nm, ievent); \ + } typedef struct isc__netievent__socket_req { - isc__netievent_type type; - isc_nmsocket_t *sock; + NETIEVENT__SOCKET; isc__nm_uvreq_t *req; } isc__netievent__socket_req_t; -typedef isc__netievent__socket_req_t isc__netievent_udpconnect_t; -typedef isc__netievent__socket_req_t isc__netievent_tcpconnect_t; -typedef isc__netievent__socket_req_t isc__netievent_tcplisten_t; -typedef isc__netievent__socket_req_t isc__netievent_tcpsend_t; -typedef isc__netievent__socket_req_t isc__netievent_tcpdnssend_t; +#define NETIEVENT_SOCKET_REQ_TYPE(type) \ + typedef isc__netievent__socket_req_t isc__netievent_##type##_t; + +#define NETIEVENT_SOCKET_REQ_DECL(type) \ + isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ + isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req); \ + void isc__nm_put_netievent_##type(isc_nm_t *nm, \ + isc__netievent_##type##_t *ievent); + +#define NETIEVENT_SOCKET_REQ_DEF(type) \ + isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ + isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { \ + isc__netievent_##type##_t *ievent = \ + isc__nm_get_netievent(nm, netievent_##type); \ + isc__nmsocket_attach(sock, &ievent->sock); \ + ievent->req = req; \ + \ + return (ievent); \ + } \ + \ + void isc__nm_put_netievent_##type(isc_nm_t *nm, \ + isc__netievent_##type##_t *ievent) { \ + isc__nmsocket_detach(&ievent->sock); \ + isc__nm_put_netievent(nm, ievent); \ + } typedef struct isc__netievent__socket_req_result { isc__netievent_type type; @@ -273,43 +419,100 @@ typedef struct isc__netievent__socket_req_result { isc_result_t result; } isc__netievent__socket_req_result_t; -typedef isc__netievent__socket_req_result_t isc__netievent_connectcb_t; -typedef isc__netievent__socket_req_result_t isc__netievent_acceptcb_t; -typedef isc__netievent__socket_req_result_t isc__netievent_readcb_t; -typedef isc__netievent__socket_req_result_t isc__netievent_sendcb_t; +#define NETIEVENT_SOCKET_REQ_RESULT_TYPE(type) \ + typedef isc__netievent__socket_req_result_t isc__netievent_##type##_t; -typedef struct isc__netievent__socket_streaminfo_quota { - isc__netievent_type type; - isc_nmsocket_t *sock; - isc_uv_stream_info_t streaminfo; - isc_quota_t *quota; -} isc__netievent__socket_streaminfo_quota_t; +#define NETIEVENT_SOCKET_REQ_RESULT_DECL(type) \ + isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ + isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req, \ + isc_result_t result); \ + void isc__nm_put_netievent_##type(isc_nm_t *nm, \ + isc__netievent_##type##_t *ievent); -typedef isc__netievent__socket_streaminfo_quota_t - isc__netievent_tcpchildaccept_t; +#define NETIEVENT_SOCKET_REQ_RESULT_DEF(type) \ + isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ + isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req, \ + isc_result_t result) { \ + isc__netievent_##type##_t *ievent = \ + isc__nm_get_netievent(nm, netievent_##type); \ + isc__nmsocket_attach(sock, &ievent->sock); \ + ievent->req = req; \ + ievent->result = result; \ + \ + return (ievent); \ + } \ + \ + void isc__nm_put_netievent_##type(isc_nm_t *nm, \ + isc__netievent_##type##_t *ievent) { \ + isc__nmsocket_detach(&ievent->sock); \ + isc__nm_put_netievent(nm, ievent); \ + } typedef struct isc__netievent__socket_handle { - isc__netievent_type type; - isc_nmsocket_t *sock; + NETIEVENT__SOCKET; isc_nmhandle_t *handle; } isc__netievent__socket_handle_t; -typedef isc__netievent__socket_handle_t isc__netievent_udpcancel_t; -typedef isc__netievent__socket_handle_t isc__netievent_tcpcancel_t; -typedef isc__netievent__socket_handle_t isc__netievent_tcpdnscancel_t; -typedef isc__netievent__socket_handle_t isc__netievent_detach_t; +#define NETIEVENT_SOCKET_HANDLE_TYPE(type) \ + typedef isc__netievent__socket_handle_t isc__netievent_##type##_t; + +#define NETIEVENT_SOCKET_HANDLE_DECL(type) \ + isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ + isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle); \ + void isc__nm_put_netievent_##type(isc_nm_t *nm, \ + isc__netievent_##type##_t *ievent); + +#define NETIEVENT_SOCKET_HANDLE_DEF(type) \ + isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ + isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle) { \ + isc__netievent_##type##_t *ievent = \ + isc__nm_get_netievent(nm, netievent_##type); \ + isc__nmsocket_attach(sock, &ievent->sock); \ + isc_nmhandle_attach(handle, &ievent->handle); \ + \ + return (ievent); \ + } \ + \ + void isc__nm_put_netievent_##type(isc_nm_t *nm, \ + isc__netievent_##type##_t *ievent) { \ + isc__nmsocket_detach(&ievent->sock); \ + isc_nmhandle_detach(&ievent->handle); \ + isc__nm_put_netievent(nm, ievent); \ + } typedef struct isc__netievent__socket_quota { - isc__netievent_type type; - isc_nmsocket_t *sock; + NETIEVENT__SOCKET; isc_quota_t *quota; } isc__netievent__socket_quota_t; -typedef isc__netievent__socket_quota_t isc__netievent_tcpaccept_t; +#define NETIEVENT_SOCKET_QUOTA_TYPE(type) \ + typedef isc__netievent__socket_quota_t isc__netievent_##type##_t; + +#define NETIEVENT_SOCKET_QUOTA_DECL(type) \ + isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ + isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota); \ + void isc__nm_put_netievent_##type(isc_nm_t *nm, \ + isc__netievent_##type##_t *ievent); + +#define NETIEVENT_SOCKET_QUOTA_DEF(type) \ + isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ + isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota) { \ + isc__netievent_##type##_t *ievent = \ + isc__nm_get_netievent(nm, netievent_##type); \ + isc__nmsocket_attach(sock, &ievent->sock); \ + ievent->quota = quota; \ + \ + return (ievent); \ + } \ + \ + void isc__nm_put_netievent_##type(isc_nm_t *nm, \ + isc__netievent_##type##_t *ievent) { \ + isc__nmsocket_detach(&ievent->sock); \ + isc__nm_put_netievent(nm, ievent); \ + } typedef struct isc__netievent_udpsend { - isc__netievent_type type; - isc_nmsocket_t *sock; + NETIEVENT__SOCKET; isc_sockaddr_t peer; isc__nm_uvreq_t *req; } isc__netievent_udpsend_t; @@ -326,8 +529,26 @@ typedef struct isc__netievent { isc__netievent_type type; } isc__netievent_t; -typedef isc__netievent_t isc__netievent_shutdown_t; -typedef isc__netievent_t isc__netievent_stop_t; +#define NETIEVENT_TYPE(type) typedef isc__netievent_t isc__netievent_##type##_t; + +#define NETIEVENT_DECL(type) \ + isc__netievent_##type##_t *isc__nm_get_netievent_##type(isc_nm_t *nm); \ + void isc__nm_put_netievent_##type(isc_nm_t *nm, \ + isc__netievent_##type##_t *ievent); + +#define NETIEVENT_DEF(type) \ + isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ + isc_nm_t *nm) { \ + isc__netievent_##type##_t *ievent = \ + isc__nm_get_netievent(nm, netievent_##type); \ + \ + return (ievent); \ + } \ + \ + void isc__nm_put_netievent_##type(isc_nm_t *nm, \ + isc__netievent_##type##_t *ievent) { \ + isc__nm_put_netievent(nm, ievent); \ + } typedef union { isc__netievent_t ni; @@ -335,7 +556,6 @@ typedef union { isc__netievent__socket_req_t nisr; isc__netievent_udpsend_t nius; isc__netievent__socket_quota_t nisq; - isc__netievent__socket_streaminfo_quota_t nissq; isc__netievent_tlsconnect_t nitc; } isc__netievent_storage_t; @@ -405,7 +625,9 @@ typedef enum isc_nmsocket_type { isc_nm_tcpdnslistener, isc_nm_tcpdnssocket, isc_nm_tlslistener, - isc_nm_tlssocket + isc_nm_tlssocket, + isc_nm_tlsdnslistener, + isc_nm_tlsdnssocket } isc_nmsocket_type; /*% @@ -440,7 +662,7 @@ struct isc_nmsocket { isc_nmsocket_t *parent; /*% Listener socket this connection was accepted on */ isc_nmsocket_t *listener; - /*% Self, for self-contained unreferenced sockets (tcpdns) */ + /*% Self socket */ isc_nmsocket_t *self; /*% TLS stuff */ @@ -513,7 +735,7 @@ struct isc_nmsocket { /* Atomic */ /*% Number of running (e.g. listening) child sockets */ - atomic_int_fast32_t rchildren; + uint_fast32_t rchildren; /*% * Socket is active if it's listening, working, etc. If it's @@ -532,11 +754,10 @@ struct isc_nmsocket { atomic_bool closing; atomic_bool closed; atomic_bool listening; - atomic_bool listen_error; atomic_bool connecting; atomic_bool connected; - atomic_bool connect_error; bool accepting; + bool reading; isc_refcount_t references; /*% @@ -550,17 +771,10 @@ struct isc_nmsocket { atomic_bool sequential; /*% - * TCPDNS socket has exceeded the maximum number of - * simultaneous requests per connection, so will be temporarily - * restricted from pipelining. + * The socket is processing read callback, this is guard to not read + * data before the readcb is back. */ - atomic_bool overlimit; - - /*% - * TCPDNS socket in sequential mode is currently processing a packet, - * we need to wait until it finishes. - */ - atomic_bool processing; + bool processing; /*% * A TCP socket has had isc_nm_pauseread() called. @@ -584,14 +798,41 @@ struct isc_nmsocket { * Used to wait for TCP listening events to complete, and * for the number of running children to reach zero during * shutdown. + * + * We use two condition variables to prevent the race where the netmgr + * threads would be able to finish and destroy the socket before it's + * unlocked by the isc_nm_listen() function. So, the flow is as + * follows: + * + * 1. parent thread creates all children sockets and passes then to + * netthreads, looks at the signaling variable and WAIT(cond) until + * the childrens are done initializing + * + * 2. the events get picked by netthreads, calls the libuv API (and + * either succeeds or fails) and WAIT(scond) until all other + * children sockets in netthreads are initialized and the listening + * socket lock is unlocked + * + * 3. the control is given back to the parent thread which now either + * returns success or shutdowns the listener if an error has + * occured in the children netthread + * + * NOTE: The other approach would be doing an extra attach to the parent + * listening socket, and then detach it in the parent thread, but that + * breaks the promise that once the libuv socket is initialized on the + * nmsocket, the nmsocket needs to be handled only by matching + * netthread, so in fact that would add a complexity in a way that + * isc__nmsocket_detach would have to be converted to use an + * asynchrounous netievent. */ isc_mutex_t lock; isc_condition_t cond; + isc_condition_t scond; /*% * Used to pass a result back from listen or connect events. */ - atomic_int_fast32_t result; + isc_result_t result; /*% * List of active handles. @@ -631,14 +872,18 @@ struct isc_nmsocket { */ isc_nm_opaquecb_t closehandle_cb; + isc_nmhandle_t *recv_handle; isc_nm_recv_cb_t recv_cb; void *recv_cbarg; + bool recv_read; isc_nm_cb_t connect_cb; void *connect_cbarg; isc_nm_accept_cb_t accept_cb; void *accept_cbarg; + + atomic_int_fast32_t active_child_connections; #ifdef NETMGR_TRACE void *backtrace[TRACE_SIZE]; int backtrace_size; @@ -653,13 +898,12 @@ isc__nm_in_netthread(void); * Returns 'true' if we're in the network thread. */ -void * -isc__nm_get_ievent(isc_nm_t *mgr, isc__netievent_type type); -/*%< - * Allocate an ievent and set the type. - */ void -isc__nm_put_ievent(isc_nm_t *mgr, void *ievent); +isc__nm_maybe_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event); +/*%< + * If the caller is already in the matching nmthread, process the netievent + * directly, if not enqueue using isc__nm_enqueue_ievent(). + */ void isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event); @@ -679,8 +923,8 @@ isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf); */ isc_nmhandle_t * -isc__nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, - isc_sockaddr_t *local); +isc___nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, + isc_sockaddr_t *local FLARG); /*%< * Get a handle for the socket 'sock', allocating a new one * if there isn't one available in 'sock->inactivehandles'. @@ -696,14 +940,14 @@ isc__nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, */ isc__nm_uvreq_t * -isc__nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock); +isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG); /*%< * Get a UV request structure for the socket 'sock', allocating a * new one if there isn't one available in 'sock->inactivereqs'. */ void -isc__nm_uvreq_put(isc__nm_uvreq_t **req, isc_nmsocket_t *sock); +isc___nm_uvreq_put(isc__nm_uvreq_t **req, isc_nmsocket_t *sock FLARG); /*%< * Completes the use of a UV request structure, setting '*req' to NULL. * @@ -712,28 +956,28 @@ isc__nm_uvreq_put(isc__nm_uvreq_t **req, isc_nmsocket_t *sock); */ void -isc__nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, - isc_nmiface_t *iface); +isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, + isc_nmiface_t *iface FLARG); /*%< * Initialize socket 'sock', attach it to 'mgr', and set it to type 'type' * and its interface to 'iface'. */ void -isc__nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target); +isc___nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target FLARG); /*%< * Attach to a socket, increasing refcount */ void -isc__nmsocket_detach(isc_nmsocket_t **socketp); +isc___nmsocket_detach(isc_nmsocket_t **socketp FLARG); /*%< * Detach from socket, decreasing refcount and possibly destroying the * socket if it's no longer referenced. */ void -isc__nmsocket_prep_destroy(isc_nmsocket_t *sock); +isc___nmsocket_prep_destroy(isc_nmsocket_t *sock FLARG); /*%< * Market 'sock' as inactive, close it if necessary, and destroy it * if there are no remaining references or active handles. @@ -771,17 +1015,14 @@ void isc__nm_async_connectcb(isc__networker_t *worker, isc__netievent_t *ev0); /*%< * Issue a connect callback on the socket, used to call the callback - * on failed conditions when the event can't be scheduled on the uv loop. + */ -void +isc_result_t isc__nm_acceptcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq, isc_result_t eresult); -void -isc__nm_async_acceptcb(isc__networker_t *worker, isc__netievent_t *ev0); /*%< - * Issue a accept callback on the socket, used to call the callback - * on failed conditions when the event can't be scheduled on the uv loop. + * Issue a synchronous accept callback on the socket. */ void @@ -806,12 +1047,6 @@ isc__nm_async_sendcb(isc__networker_t *worker, isc__netievent_t *ev0); * on failed conditions when the event can't be scheduled on the uv loop. */ -void -isc__nm_async_closecb(isc__networker_t *worker, isc__netievent_t *ev0); -/*%< - * Issue a 'handle closed' callback on the socket. - */ - void isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ev0); /*%< @@ -900,13 +1135,13 @@ isc__nm_tcp_close(isc_nmsocket_t *sock); * Close a TCP socket. */ void -isc__nm_tcp_pauseread(isc_nmsocket_t *sock); +isc__nm_tcp_pauseread(isc_nmhandle_t *handle); /*%< - * Pause reading on this socket, while still remembering the callback. + * Pause reading on this handle, while still remembering the callback. */ void -isc__nm_tcp_resumeread(isc_nmsocket_t *sock); +isc__nm_tcp_resumeread(isc_nmhandle_t *handle); /*%< * Resume reading from socket. * @@ -931,6 +1166,12 @@ isc__nm_tcp_stoplistening(isc_nmsocket_t *sock); * Stop listening on 'sock'. */ +int_fast32_t +isc__nm_tcp_listener_nactive(isc_nmsocket_t *sock); +/*%< + * Returns the number of active connections for the TCP listener socket. + */ + void isc__nm_tcp_settimeout(isc_nmhandle_t *handle, uint32_t timeout); /*%< @@ -944,8 +1185,6 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0); void isc__nm_async_tcpaccept(isc__networker_t *worker, isc__netievent_t *ev0); void -isc__nm_async_tcpchildaccept(isc__networker_t *worker, isc__netievent_t *ev0); -void isc__nm_async_tcpstop(isc__networker_t *worker, isc__netievent_t *ev0); void isc__nm_async_tcpsend(isc__networker_t *worker, isc__netievent_t *ev0); @@ -954,9 +1193,9 @@ isc__nm_async_startread(isc__networker_t *worker, isc__netievent_t *ev0); void isc__nm_async_pauseread(isc__networker_t *worker, isc__netievent_t *ev0); void -isc__nm_async_tcp_startread(isc__networker_t *worker, isc__netievent_t *ev0); +isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0); void -isc__nm_async_tcp_pauseread(isc__networker_t *worker, isc__netievent_t *ev0); +isc__nm_async_tcppauseread(isc__networker_t *worker, isc__netievent_t *ev0); void isc__nm_async_tcpcancel(isc__networker_t *worker, isc__netievent_t *ev0); void @@ -976,15 +1215,21 @@ void isc__nm_async_tlsconnect(isc__networker_t *worker, isc__netievent_t *ev0); void -isc__nm_async_tls_startread(isc__networker_t *worker, isc__netievent_t *ev0); +isc__nm_async_tlsstartread(isc__networker_t *worker, isc__netievent_t *ev0); void -isc__nm_async_tls_do_bio(isc__networker_t *worker, isc__netievent_t *ev0); +isc__nm_async_tlsdobio(isc__networker_t *worker, isc__netievent_t *ev0); /*%< * Callback handlers for asynchronouse TLS events. */ +void +isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0); +void +isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0); +void +isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0); void isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb, void *cbarg); @@ -992,6 +1237,9 @@ isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region, * Back-end implementation of isc_nm_send() for TCPDNS handles. */ +void +isc__nm_tcpdns_shutdown(isc_nmsocket_t *sock); + void isc__nm_tcpdns_close(isc_nmsocket_t *sock); /*%< @@ -1011,6 +1259,10 @@ isc__nm_tcpdns_settimeout(isc_nmhandle_t *handle, uint32_t timeout); * associated with 'handle', and the TCP socket it wraps around. */ +void +isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0); +void +isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0); void isc__nm_async_tcpdnscancel(isc__networker_t *worker, isc__netievent_t *ev0); void @@ -1032,6 +1284,56 @@ isc__nm_tcpdns_cancelread(isc_nmhandle_t *handle); * Stop reading on a connected TCPDNS handle. */ +void +isc__nm_tlsdns_send(isc_nmhandle_t *handle, isc_region_t *region, + isc_nm_cb_t cb, void *cbarg); +/*%< + * Back-end implementation of isc_nm_send() for TLSDNS handles. + */ + +void +isc__nm_tlsdns_shutdown(isc_nmsocket_t *sock); + +void +isc__nm_tlsdns_close(isc_nmsocket_t *sock); +/*%< + * Close a TLSDNS socket. + */ + +void +isc__nm_tlsdns_stoplistening(isc_nmsocket_t *sock); +/*%< + * Stop listening on 'sock'. + */ + +void +isc__nm_tlsdns_settimeout(isc_nmhandle_t *handle, uint32_t timeout); +/*%< + * Set the read timeout and reset the timer for the TLSDNS socket + * associated with 'handle', and the TCP socket it wraps around. + */ + +void +isc__nm_async_tlsdnscancel(isc__networker_t *worker, isc__netievent_t *ev0); +void +isc__nm_async_tlsdnsclose(isc__networker_t *worker, isc__netievent_t *ev0); +void +isc__nm_async_tlsdnssend(isc__networker_t *worker, isc__netievent_t *ev0); +void +isc__nm_async_tlsdnsstop(isc__networker_t *worker, isc__netievent_t *ev0); + +void +isc__nm_async_tlsdnsread(isc__networker_t *worker, isc__netievent_t *ev0); + +void +isc__nm_tlsdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg); + +void +isc__nm_tlsdns_cancelread(isc_nmhandle_t *handle); +/*%< + * Stop reading on a connected TLSDNS handle. + */ + void isc__nm_tls_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb, void *cbarg); @@ -1046,15 +1348,15 @@ isc__nm_tls_close(isc_nmsocket_t *sock); */ void -isc__nm_tls_pauseread(isc_nmsocket_t *sock); +isc__nm_tls_pauseread(isc_nmhandle_t *handle); /*%< - * Pause reading on this socket, while still remembering the callback. + * Pause reading on this handle, while still remembering the callback. */ void -isc__nm_tls_resumeread(isc_nmsocket_t *sock); +isc__nm_tls_resumeread(isc_nmhandle_t *handle); /*%< - * Resume reading from socket. + * Resume reading from the handle. * */ @@ -1062,10 +1364,10 @@ void isc__nm_tls_stoplistening(isc_nmsocket_t *sock); #define isc__nm_uverr2result(x) \ - isc___nm_uverr2result(x, true, __FILE__, __LINE__) + isc___nm_uverr2result(x, true, __FILE__, __LINE__, __func__) isc_result_t isc___nm_uverr2result(int uverr, bool dolog, const char *file, - unsigned int line); + unsigned int line, const char *func); /*%< * Convert a libuv error value into an isc_result_t. The * list of supported error values is not complete; new users @@ -1109,6 +1411,12 @@ isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp); * Platform independent socket() version */ +void +isc__nm_closesocket(uv_os_sock_t sock); +/*%< + * Platform independent closesocket() version + */ + isc_result_t isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family); /*%< @@ -1139,8 +1447,124 @@ isc__nm_socket_dontfrag(uv_os_sock_t fd, sa_family_t sa_family); * Set the SO_IP_DONTFRAG (or equivalent) socket option of the fd if available */ +isc_result_t +isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms); +/*%< + * Set the connection timeout in miliseconds, on non-Linux platforms, + * the minimum value must be at least 1000 (1 second). + */ + void isc__nm_tls_initialize(void); /*%< * Initialize OpenSSL library, idempotent. */ + +/* + * typedef all the netievent types + */ + +NETIEVENT_SOCKET_TYPE(close); +NETIEVENT_SOCKET_TYPE(tcpclose); +NETIEVENT_SOCKET_TYPE(tcplisten); +NETIEVENT_SOCKET_TYPE(tcppauseread); +NETIEVENT_SOCKET_TYPE(tcpstop); +NETIEVENT_SOCKET_TYPE(tlsclose); +/* NETIEVENT_SOCKET_TYPE(tlsconnect); */ /* unique type, defined independently + */ +NETIEVENT_SOCKET_TYPE(tlsdobio); +NETIEVENT_SOCKET_TYPE(tlsstartread); +NETIEVENT_SOCKET_TYPE(udpclose); +NETIEVENT_SOCKET_TYPE(udplisten); +NETIEVENT_SOCKET_TYPE(udpread); +/* NETIEVENT_SOCKET_TYPE(udpsend); */ /* unique type, defined independently */ +NETIEVENT_SOCKET_TYPE(udpstop); + +NETIEVENT_SOCKET_TYPE(tcpdnsclose); +NETIEVENT_SOCKET_TYPE(tcpdnsread); +NETIEVENT_SOCKET_TYPE(tcpdnsstop); +NETIEVENT_SOCKET_TYPE(tcpdnslisten); +NETIEVENT_SOCKET_REQ_TYPE(tcpdnsconnect); +NETIEVENT_SOCKET_REQ_TYPE(tcpdnssend); +NETIEVENT_SOCKET_HANDLE_TYPE(tcpdnscancel); +NETIEVENT_SOCKET_QUOTA_TYPE(tcpdnsaccept); + +NETIEVENT_SOCKET_TYPE(tlsdnsclose); +NETIEVENT_SOCKET_TYPE(tlsdnsread); +NETIEVENT_SOCKET_TYPE(tlsdnsstop); +NETIEVENT_SOCKET_REQ_TYPE(tlsdnssend); +NETIEVENT_SOCKET_HANDLE_TYPE(tlsdnscancel); + +NETIEVENT_SOCKET_REQ_TYPE(tcpconnect); +NETIEVENT_SOCKET_REQ_TYPE(tcpsend); +NETIEVENT_SOCKET_TYPE(tcpstartread); +NETIEVENT_SOCKET_REQ_TYPE(tlssend); +NETIEVENT_SOCKET_REQ_TYPE(udpconnect); + +NETIEVENT_SOCKET_REQ_RESULT_TYPE(connectcb); +NETIEVENT_SOCKET_REQ_RESULT_TYPE(readcb); +NETIEVENT_SOCKET_REQ_RESULT_TYPE(sendcb); + +NETIEVENT_SOCKET_HANDLE_TYPE(detach); +NETIEVENT_SOCKET_HANDLE_TYPE(tcpcancel); +NETIEVENT_SOCKET_HANDLE_TYPE(udpcancel); + +NETIEVENT_SOCKET_QUOTA_TYPE(tcpaccept); + +NETIEVENT_TYPE(pause); +NETIEVENT_TYPE(resume); +NETIEVENT_TYPE(shutdown); +NETIEVENT_TYPE(stop); + +/* Now declared the helper functions */ + +NETIEVENT_SOCKET_DECL(close); +NETIEVENT_SOCKET_DECL(tcpclose); +NETIEVENT_SOCKET_DECL(tcplisten); +NETIEVENT_SOCKET_DECL(tcppauseread); +NETIEVENT_SOCKET_DECL(tcpstartread); +NETIEVENT_SOCKET_DECL(tcpstop); +NETIEVENT_SOCKET_DECL(tlsclose); +NETIEVENT_SOCKET_DECL(tlsconnect); +NETIEVENT_SOCKET_DECL(tlsdobio); +NETIEVENT_SOCKET_DECL(tlsstartread); +NETIEVENT_SOCKET_DECL(udpclose); +NETIEVENT_SOCKET_DECL(udplisten); +NETIEVENT_SOCKET_DECL(udpread); +NETIEVENT_SOCKET_DECL(udpsend); +NETIEVENT_SOCKET_DECL(udpstop); + +NETIEVENT_SOCKET_DECL(tcpdnsclose); +NETIEVENT_SOCKET_DECL(tcpdnsread); +NETIEVENT_SOCKET_DECL(tcpdnsstop); +NETIEVENT_SOCKET_DECL(tcpdnslisten); +NETIEVENT_SOCKET_REQ_DECL(tcpdnsconnect); +NETIEVENT_SOCKET_REQ_DECL(tcpdnssend); +NETIEVENT_SOCKET_HANDLE_DECL(tcpdnscancel); +NETIEVENT_SOCKET_QUOTA_DECL(tcpdnsaccept); + +NETIEVENT_SOCKET_DECL(tlsdnsclose); +NETIEVENT_SOCKET_DECL(tlsdnsread); +NETIEVENT_SOCKET_DECL(tlsdnsstop); +NETIEVENT_SOCKET_REQ_DECL(tlsdnssend); +NETIEVENT_SOCKET_HANDLE_DECL(tlsdnscancel); + +NETIEVENT_SOCKET_REQ_DECL(tcpconnect); +NETIEVENT_SOCKET_REQ_DECL(tcpsend); +NETIEVENT_SOCKET_REQ_DECL(tlssend); +NETIEVENT_SOCKET_REQ_DECL(udpconnect); + +NETIEVENT_SOCKET_REQ_RESULT_DECL(connectcb); +NETIEVENT_SOCKET_REQ_RESULT_DECL(readcb); +NETIEVENT_SOCKET_REQ_RESULT_DECL(sendcb); + +NETIEVENT_SOCKET_HANDLE_DECL(udpcancel); +NETIEVENT_SOCKET_HANDLE_DECL(tcpcancel); +NETIEVENT_SOCKET_DECL(detach); + +NETIEVENT_SOCKET_QUOTA_DECL(tcpaccept); + +NETIEVENT_DECL(pause); +NETIEVENT_DECL(resume); +NETIEVENT_DECL(shutdown); +NETIEVENT_DECL(stop); diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 632b636733..8ac57b61f3 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -37,6 +37,7 @@ #ifdef NETMGR_TRACE #include + #endif /*% @@ -125,7 +126,7 @@ static const isc_statscounter_t unixstatsindex[] = { static thread_local int isc__nm_tid_v = ISC_NETMGR_TID_UNKNOWN; static void -nmsocket_maybe_destroy(isc_nmsocket_t *sock); +nmsocket_maybe_destroy(isc_nmsocket_t *sock FLARG); static void nmhandle_free(isc_nmsocket_t *sock, isc_nmhandle_t *handle); static isc_threadresult_t @@ -142,16 +143,21 @@ static void process_queues(isc__networker_t *worker); static void -isc__nm_async_stopcb(isc__networker_t *worker, isc__netievent_t *ev0); +isc__nm_async_stop(isc__networker_t *worker, isc__netievent_t *ev0); static void -isc__nm_async_pausecb(isc__networker_t *worker, isc__netievent_t *ev0); +isc__nm_async_pause(isc__networker_t *worker, isc__netievent_t *ev0); static void -isc__nm_async_resumecb(isc__networker_t *worker, isc__netievent_t *ev0); +isc__nm_async_resume(isc__networker_t *worker, isc__netievent_t *ev0); static void isc__nm_async_detach(isc__networker_t *worker, isc__netievent_t *ev0); +static void +isc__nm_async_close(isc__networker_t *worker, isc__netievent_t *ev0); +/*%< + * Issue a 'handle closed' callback on the socket. + */ static void -nmhandle_detach_cb(isc_nmhandle_t **handlep); +nmhandle_detach_cb(isc_nmhandle_t **handlep FLARG); int isc_nm_tid(void) { @@ -163,6 +169,22 @@ isc__nm_in_netthread(void) { return (isc__nm_tid_v >= 0); } +static bool +isc__nm_test_lb_socket(sa_family_t sa_family, int protocol) { + isc_result_t result; + uv_os_sock_t fd = -1; + + result = isc__nm_socket(sa_family, protocol, 0, &fd); + REQUIRE(result == ISC_R_SUCCESS); + + result = isc__nm_socket_reuse_lb(fd); + REQUIRE(result == ISC_R_SUCCESS || result == ISC_R_NOTIMPLEMENTED); + + isc__nm_closesocket(fd); + + return (result == ISC_R_SUCCESS); +} + isc_nm_t * isc_nm_start(isc_mem_t *mctx, uint32_t workers) { isc_nm_t *mgr = NULL; @@ -170,6 +192,14 @@ isc_nm_start(isc_mem_t *mctx, uint32_t workers) { isc__nm_tls_initialize(); + if (!isc__nm_test_lb_socket(AF_INET, SOCK_DGRAM) || + !isc__nm_test_lb_socket(AF_INET, SOCK_STREAM) || + !isc__nm_test_lb_socket(AF_INET6, SOCK_DGRAM) || + !isc__nm_test_lb_socket(AF_INET6, SOCK_STREAM)) + { + workers = 1; + } + mgr = isc_mem_get(mctx, sizeof(*mgr)); *mgr = (isc_nm_t){ .nworkers = workers }; @@ -265,8 +295,7 @@ nm_destroy(isc_nm_t **mgr0) { for (size_t i = 0; i < mgr->nworkers; i++) { isc__networker_t *worker = &mgr->workers[i]; - isc__netievent_t *event = isc__nm_get_ievent(mgr, - netievent_stop); + isc__netievent_t *event = isc__nm_get_netievent_stop(mgr); isc__nm_enqueue_ievent(worker, event); } @@ -334,9 +363,9 @@ isc_nm_pause(isc_nm_t *mgr) { for (size_t i = 0; i < mgr->nworkers; i++) { isc__networker_t *worker = &mgr->workers[i]; - isc__netievent_t *event = isc__nm_get_ievent(mgr, - netievent_pause); - isc__nm_enqueue_ievent(worker, event); + isc__netievent_resume_t *event = + isc__nm_get_netievent_pause(mgr); + isc__nm_enqueue_ievent(worker, (isc__netievent_t *)event); } LOCK(&mgr->lock); @@ -353,9 +382,9 @@ isc_nm_resume(isc_nm_t *mgr) { for (size_t i = 0; i < mgr->nworkers; i++) { isc__networker_t *worker = &mgr->workers[i]; - isc__netievent_t *event = isc__nm_get_ievent(mgr, - netievent_resume); - isc__nm_enqueue_ievent(worker, event); + isc__netievent_resume_t *event = + isc__nm_get_netievent_resume(mgr); + isc__nm_enqueue_ievent(worker, (isc__netievent_t *)event); } LOCK(&mgr->lock); @@ -399,7 +428,7 @@ isc_nm_closedown(isc_nm_t *mgr) { atomic_store(&mgr->closing, true); for (size_t i = 0; i < mgr->nworkers; i++) { isc__netievent_t *event = NULL; - event = isc__nm_get_ievent(mgr, netievent_shutdown); + event = isc__nm_get_netievent_shutdown(mgr); isc__nm_enqueue_ievent(&mgr->workers[i], event); } } @@ -436,6 +465,7 @@ isc_nm_destroy(isc_nm_t **mgr0) { #ifdef NETMGR_TRACE isc__nm_dump_active(mgr); #endif + INSIST(references == 1); /* @@ -452,8 +482,8 @@ isc_nm_maxudp(isc_nm_t *mgr, uint32_t maxudp) { } void -isc_nm_tcp_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle, - uint32_t keepalive, uint32_t advertised) { +isc_nm_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle, + uint32_t keepalive, uint32_t advertised) { REQUIRE(VALID_NM(mgr)); mgr->init = init * 100; @@ -463,8 +493,8 @@ isc_nm_tcp_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle, } void -isc_nm_tcp_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, - uint32_t *keepalive, uint32_t *advertised) { +isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, + uint32_t *keepalive, uint32_t *advertised) { REQUIRE(VALID_NM(mgr)); if (initial != NULL) { @@ -561,7 +591,7 @@ async_cb(uv_async_t *handle) { } static void -isc__nm_async_stopcb(isc__networker_t *worker, isc__netievent_t *ev0) { +isc__nm_async_stop(isc__networker_t *worker, isc__netievent_t *ev0) { UNUSED(ev0); worker->finished = true; /* Close the async handler */ @@ -570,7 +600,7 @@ isc__nm_async_stopcb(isc__networker_t *worker, isc__netievent_t *ev0) { } static void -isc__nm_async_pausecb(isc__networker_t *worker, isc__netievent_t *ev0) { +isc__nm_async_pause(isc__networker_t *worker, isc__netievent_t *ev0) { UNUSED(ev0); REQUIRE(worker->paused == false); worker->paused = true; @@ -578,7 +608,7 @@ isc__nm_async_pausecb(isc__networker_t *worker, isc__netievent_t *ev0) { } static void -isc__nm_async_resumecb(isc__networker_t *worker, isc__netievent_t *ev0) { +isc__nm_async_resume(isc__networker_t *worker, isc__netievent_t *ev0) { UNUSED(ev0); REQUIRE(worker->paused == true); worker->paused = false; @@ -602,150 +632,108 @@ process_queues(isc__networker_t *worker) { (void)process_normal_queue(worker); } +/* + * The two macros here generate the individual cases for the process_netievent() + * function. The NETIEVENT_CASE(type) macro is the common case, and + * NETIEVENT_CASE_NOMORE(type) is a macro that causes the loop in the + * process_queue() to stop, e.g. it's only used for the netievent that + * stops/pauses processing the enqueued netievents. + */ +#define NETIEVENT_CASE(type) \ + case netievent_##type: { \ + isc__nm_async_##type(worker, ievent); \ + isc__nm_put_netievent_##type( \ + worker->mgr, (isc__netievent_##type##_t *)ievent); \ + return (true); \ + } + +#define NETIEVENT_CASE_NOMORE(type) \ + case netievent_##type: { \ + isc__nm_async_##type(worker, ievent); \ + isc__nm_put_netievent_##type(worker->mgr, ievent); \ + return (false); \ + } + +static bool +process_netievent(isc__networker_t *worker, isc__netievent_t *ievent) { + REQUIRE(worker->id == isc_nm_tid()); + + switch (ievent->type) { + /* Don't process more ievents when we are stopping */ + NETIEVENT_CASE_NOMORE(stop); + + NETIEVENT_CASE(udpconnect); + NETIEVENT_CASE(udplisten); + NETIEVENT_CASE(udpstop); + NETIEVENT_CASE(udpsend); + NETIEVENT_CASE(udpread); + NETIEVENT_CASE(udpcancel); + NETIEVENT_CASE(udpclose); + + NETIEVENT_CASE(tcpaccept); + NETIEVENT_CASE(tcpconnect); + NETIEVENT_CASE(tcplisten); + NETIEVENT_CASE(tcpstartread); + NETIEVENT_CASE(tcppauseread); + NETIEVENT_CASE(tcpsend); + NETIEVENT_CASE(tcpstop); + NETIEVENT_CASE(tcpcancel); + NETIEVENT_CASE(tcpclose); + + NETIEVENT_CASE(tcpdnsaccept); + NETIEVENT_CASE(tcpdnslisten); + NETIEVENT_CASE(tcpdnsconnect); + NETIEVENT_CASE(tcpdnssend); + NETIEVENT_CASE(tcpdnscancel); + NETIEVENT_CASE(tcpdnsclose); + NETIEVENT_CASE(tcpdnsread); + NETIEVENT_CASE(tcpdnsstop); + + NETIEVENT_CASE(tlsstartread); + NETIEVENT_CASE(tlssend); + NETIEVENT_CASE(tlsclose); + NETIEVENT_CASE(tlsconnect); + NETIEVENT_CASE(tlsdobio); + + NETIEVENT_CASE(tlsdnssend); + NETIEVENT_CASE(tlsdnscancel); + NETIEVENT_CASE(tlsdnsclose); + NETIEVENT_CASE(tlsdnsread); + NETIEVENT_CASE(tlsdnsstop); + + NETIEVENT_CASE(connectcb); + NETIEVENT_CASE(readcb); + NETIEVENT_CASE(sendcb); + + NETIEVENT_CASE(close); + NETIEVENT_CASE(detach); + + NETIEVENT_CASE(shutdown); + NETIEVENT_CASE(resume); + NETIEVENT_CASE_NOMORE(pause); + + default: + INSIST(0); + ISC_UNREACHABLE(); + } + return (true); +} + static bool process_queue(isc__networker_t *worker, isc_queue_t *queue) { isc__netievent_t *ievent = NULL; - bool more = true; while ((ievent = (isc__netievent_t *)isc_queue_dequeue(queue)) != NULL) { - switch (ievent->type) { - case netievent_stop: - isc__nm_async_stopcb(worker, ievent); - /* Don't process more ievents when we are stopping */ - more = false; - break; - - case netievent_udpconnect: - isc__nm_async_udpconnect(worker, ievent); - break; - case netievent_udplisten: - isc__nm_async_udplisten(worker, ievent); - break; - case netievent_udpstop: - isc__nm_async_udpstop(worker, ievent); - break; - case netievent_udpsend: - isc__nm_async_udpsend(worker, ievent); - break; - case netievent_udpread: - isc__nm_async_udpread(worker, ievent); - break; - case netievent_udpcancel: - isc__nm_async_udpcancel(worker, ievent); - break; - case netievent_udpclose: - isc__nm_async_udpclose(worker, ievent); - break; - - case netievent_tcpconnect: - isc__nm_async_tcpconnect(worker, ievent); - break; - case netievent_tcplisten: - isc__nm_async_tcplisten(worker, ievent); - break; - case netievent_tcpchildaccept: - isc__nm_async_tcpchildaccept(worker, ievent); - break; - case netievent_tcpaccept: - isc__nm_async_tcpaccept(worker, ievent); - break; - case netievent_tcpstartread: - isc__nm_async_tcp_startread(worker, ievent); - break; - case netievent_tcppauseread: - isc__nm_async_tcp_pauseread(worker, ievent); - break; - case netievent_tcpsend: - isc__nm_async_tcpsend(worker, ievent); - break; - case netievent_tcpdnssend: - isc__nm_async_tcpdnssend(worker, ievent); - break; - case netievent_tcpstop: - isc__nm_async_tcpstop(worker, ievent); - break; - case netievent_tcpcancel: - isc__nm_async_tcpcancel(worker, ievent); - break; - case netievent_tcpclose: - isc__nm_async_tcpclose(worker, ievent); - break; - case netievent_tcpdnscancel: - isc__nm_async_tcpdnscancel(worker, ievent); - break; - case netievent_tcpdnsclose: - isc__nm_async_tcpdnsclose(worker, ievent); - break; - case netievent_tcpdnsread: - isc__nm_async_tcpdnsread(worker, ievent); - break; - case netievent_tcpdnsstop: - isc__nm_async_tcpdnsstop(worker, ievent); - break; - - case netievent_tlsstartread: - isc__nm_async_tls_startread(worker, ievent); - break; - case netievent_tlssend: - isc__nm_async_tlssend(worker, ievent); - break; - case netievent_tlsclose: - isc__nm_async_tlsclose(worker, ievent); - break; - case netievent_tlsconnect: - isc__nm_async_tlsconnect(worker, ievent); - break; - case netievent_tlsdobio: - isc__nm_async_tls_do_bio(worker, ievent); - break; - - case netievent_connectcb: - isc__nm_async_connectcb(worker, ievent); - break; - case netievent_acceptcb: - isc__nm_async_acceptcb(worker, ievent); - break; - case netievent_readcb: - isc__nm_async_readcb(worker, ievent); - break; - case netievent_sendcb: - isc__nm_async_sendcb(worker, ievent); - break; - case netievent_closecb: - isc__nm_async_closecb(worker, ievent); - break; - - case netievent_detach: - isc__nm_async_detach(worker, ievent); - break; - case netievent_shutdown: - isc__nm_async_shutdown(worker, ievent); - break; - - case netievent_resume: - isc__nm_async_resumecb(worker, ievent); - break; - case netievent_pause: - isc__nm_async_pausecb(worker, ievent); - /* Don't process more ievents when we are pausing */ - more = false; - break; - default: - INSIST(0); - ISC_UNREACHABLE(); - } - - isc__nm_put_ievent(worker->mgr, ievent); - if (!more) { - break; + if (!process_netievent(worker, ievent)) { + return (false); } } - return (more); + return (true); } void * -isc__nm_get_ievent(isc_nm_t *mgr, isc__netievent_type type) { +isc__nm_get_netievent(isc_nm_t *mgr, isc__netievent_type type) { isc__netievent_storage_t *event = isc_mempool_get(mgr->evpool); *event = (isc__netievent_storage_t){ .ni.type = type }; @@ -753,10 +741,76 @@ isc__nm_get_ievent(isc_nm_t *mgr, isc__netievent_type type) { } void -isc__nm_put_ievent(isc_nm_t *mgr, void *ievent) { +isc__nm_put_netievent(isc_nm_t *mgr, void *ievent) { isc_mempool_put(mgr->evpool, ievent); } +NETIEVENT_SOCKET_DEF(tcpclose); +NETIEVENT_SOCKET_DEF(tcplisten); +NETIEVENT_SOCKET_DEF(tcppauseread); +NETIEVENT_SOCKET_DEF(tcpstartread); +NETIEVENT_SOCKET_DEF(tcpstop); +NETIEVENT_SOCKET_DEF(tlsclose); +NETIEVENT_SOCKET_DEF(tlsconnect); +NETIEVENT_SOCKET_DEF(tlsdobio); +NETIEVENT_SOCKET_DEF(tlsstartread); +NETIEVENT_SOCKET_DEF(udpclose); +NETIEVENT_SOCKET_DEF(udplisten); +NETIEVENT_SOCKET_DEF(udpread); +NETIEVENT_SOCKET_DEF(udpsend); +NETIEVENT_SOCKET_DEF(udpstop); + +NETIEVENT_SOCKET_DEF(tcpdnsclose); +NETIEVENT_SOCKET_DEF(tcpdnsread); +NETIEVENT_SOCKET_DEF(tcpdnsstop); +NETIEVENT_SOCKET_DEF(tcpdnslisten); +NETIEVENT_SOCKET_REQ_DEF(tcpdnsconnect); +NETIEVENT_SOCKET_REQ_DEF(tcpdnssend); +NETIEVENT_SOCKET_HANDLE_DEF(tcpdnscancel); +NETIEVENT_SOCKET_QUOTA_DEF(tcpdnsaccept); + +NETIEVENT_SOCKET_DEF(tlsdnsclose); +NETIEVENT_SOCKET_DEF(tlsdnsread); +NETIEVENT_SOCKET_DEF(tlsdnsstop); +NETIEVENT_SOCKET_REQ_DEF(tlsdnssend); +NETIEVENT_SOCKET_HANDLE_DEF(tlsdnscancel); + +NETIEVENT_SOCKET_REQ_DEF(tcpconnect); +NETIEVENT_SOCKET_REQ_DEF(tcpsend); +NETIEVENT_SOCKET_REQ_DEF(tlssend); +NETIEVENT_SOCKET_REQ_DEF(udpconnect); + +NETIEVENT_SOCKET_REQ_RESULT_DEF(connectcb); +NETIEVENT_SOCKET_REQ_RESULT_DEF(readcb); +NETIEVENT_SOCKET_REQ_RESULT_DEF(sendcb); + +NETIEVENT_SOCKET_DEF(detach); +NETIEVENT_SOCKET_HANDLE_DEF(tcpcancel); +NETIEVENT_SOCKET_HANDLE_DEF(udpcancel); + +NETIEVENT_SOCKET_QUOTA_DEF(tcpaccept); + +NETIEVENT_SOCKET_DEF(close); +NETIEVENT_DEF(pause); +NETIEVENT_DEF(resume); +NETIEVENT_DEF(shutdown); +NETIEVENT_DEF(stop); + +void +isc__nm_maybe_enqueue_ievent(isc__networker_t *worker, + isc__netievent_t *event) { + /* + * If we are already in the matching nmthread, process the ievent + * directly. + */ + if (worker->id == isc_nm_tid()) { + process_netievent(worker, event); + return; + } + + isc__nm_enqueue_ievent(worker, event); +} + void isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event) { if (event->type > netievent_prio) { @@ -798,17 +852,24 @@ isc__nmsocket_deactivate(isc_nmsocket_t *sock) { } void -isc__nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target) { +isc___nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target FLARG) { REQUIRE(VALID_NMSOCK(sock)); REQUIRE(target != NULL && *target == NULL); + isc_nmsocket_t *rsock = NULL; + if (sock->parent != NULL) { - INSIST(sock->parent->parent == NULL); /* sanity check */ - isc_refcount_increment0(&sock->parent->references); + rsock = sock->parent; + INSIST(rsock->parent == NULL); /* sanity check */ } else { - isc_refcount_increment0(&sock->references); + rsock = sock; } + NETMGR_TRACE_LOG("isc__nmsocket_attach():%p->references = %lu\n", rsock, + isc_refcount_current(&rsock->references) + 1); + + isc_refcount_increment0(&rsock->references); + *target = sock; } @@ -816,13 +877,16 @@ isc__nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target) { * Free all resources inside a socket (including its children if any). */ static void -nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree) { +nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree FLARG) { isc_nmhandle_t *handle = NULL; isc__nm_uvreq_t *uvreq = NULL; REQUIRE(VALID_NMSOCK(sock)); REQUIRE(!isc__nmsocket_active(sock)); + NETMGR_TRACE_LOG("nmsocket_cleanup():%p->references = %lu\n", sock, + isc_refcount_current(&sock->references)); + atomic_store(&sock->destroying, true); if (sock->parent == NULL && sock->children != NULL) { @@ -832,7 +896,8 @@ nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree) { */ for (int i = 0; i < sock->nchildren; i++) { if (!atomic_load(&sock->children[i].destroying)) { - nmsocket_cleanup(&sock->children[i], false); + nmsocket_cleanup(&sock->children[i], + false FLARG_PASS); } } @@ -851,11 +916,11 @@ nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree) { sock->statichandle = NULL; if (sock->outerhandle != NULL) { - isc_nmhandle_detach(&sock->outerhandle); + isc__nmhandle_detach(&sock->outerhandle FLARG_PASS); } if (sock->outer != NULL) { - isc__nmsocket_detach(&sock->outer); + isc___nmsocket_detach(&sock->outer FLARG_PASS); } while ((handle = isc_astack_pop(sock->inactivehandles)) != NULL) { @@ -894,6 +959,7 @@ nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree) { isc_mem_free(sock->mgr->mctx, sock->ah_handles); isc_mutex_destroy(&sock->lock); isc_condition_destroy(&sock->cond); + isc_condition_destroy(&sock->scond); #ifdef NETMGR_TRACE LOCK(&sock->mgr->lock); ISC_LIST_UNLINK(sock->mgr->active_sockets, sock, active_link); @@ -909,7 +975,7 @@ nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree) { } static void -nmsocket_maybe_destroy(isc_nmsocket_t *sock) { +nmsocket_maybe_destroy(isc_nmsocket_t *sock FLARG) { int active_handles; bool destroy = false; @@ -919,7 +985,7 @@ nmsocket_maybe_destroy(isc_nmsocket_t *sock) { * as a side effect of destroying the parent, so let's go * see if the parent is ready to be destroyed. */ - nmsocket_maybe_destroy(sock->parent); + nmsocket_maybe_destroy(sock->parent FLARG_PASS); return; } @@ -949,19 +1015,25 @@ nmsocket_maybe_destroy(isc_nmsocket_t *sock) { destroy = true; } + NETMGR_TRACE_LOG("%s:%p->active_handles = %d, .statichandle = %p\n", + __func__, sock, active_handles, sock->statichandle); + if (destroy) { atomic_store(&sock->destroying, true); UNLOCK(&sock->lock); - nmsocket_cleanup(sock, true); + nmsocket_cleanup(sock, true FLARG_PASS); } else { UNLOCK(&sock->lock); } } void -isc__nmsocket_prep_destroy(isc_nmsocket_t *sock) { +isc___nmsocket_prep_destroy(isc_nmsocket_t *sock FLARG) { REQUIRE(sock->parent == NULL); + NETMGR_TRACE_LOG("isc___nmsocket_prep_destroy():%p->references = %lu\n", + sock, isc_refcount_current(&sock->references)); + /* * The final external reference to the socket is gone. We can try * destroying the socket, but we have to wait for all the inflight @@ -999,16 +1071,19 @@ isc__nmsocket_prep_destroy(isc_nmsocket_t *sock) { case isc_nm_tlssocket: isc__nm_tls_close(sock); break; + case isc_nm_tlsdnssocket: + isc__nm_tlsdns_close(sock); + return; default: break; } } - nmsocket_maybe_destroy(sock); + nmsocket_maybe_destroy(sock FLARG_PASS); } void -isc__nmsocket_detach(isc_nmsocket_t **sockp) { +isc___nmsocket_detach(isc_nmsocket_t **sockp FLARG) { REQUIRE(sockp != NULL && *sockp != NULL); REQUIRE(VALID_NMSOCK(*sockp)); @@ -1026,8 +1101,11 @@ isc__nmsocket_detach(isc_nmsocket_t **sockp) { rsock = sock; } + NETMGR_TRACE_LOG("isc__nmsocket_detach():%p->references = %lu\n", rsock, + isc_refcount_current(&rsock->references) - 1); + if (isc_refcount_decrement(&rsock->references) == 1) { - isc__nmsocket_prep_destroy(rsock); + isc___nmsocket_prep_destroy(rsock FLARG_PASS); } } @@ -1037,14 +1115,15 @@ isc_nmsocket_close(isc_nmsocket_t **sockp) { REQUIRE(VALID_NMSOCK(*sockp)); REQUIRE((*sockp)->type == isc_nm_udplistener || (*sockp)->type == isc_nm_tcplistener || - (*sockp)->type == isc_nm_tcpdnslistener); + (*sockp)->type == isc_nm_tcpdnslistener || + (*sockp)->type == isc_nm_tlsdnslistener); isc__nmsocket_detach(sockp); } void -isc__nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, - isc_nmiface_t *iface) { +isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, + isc_nmiface_t *iface FLARG) { uint16_t family; REQUIRE(sock != NULL); @@ -1096,6 +1175,10 @@ isc__nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, break; case isc_nm_tcpsocket: case isc_nm_tcplistener: + case isc_nm_tcpdnssocket: + case isc_nm_tcpdnslistener: + case isc_nm_tlsdnssocket: + case isc_nm_tlsdnslistener: if (family == AF_INET) { sock->statsindex = tcp4statsindex; } else { @@ -1109,15 +1192,19 @@ isc__nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, isc_mutex_init(&sock->lock); isc_condition_init(&sock->cond); + isc_condition_init(&sock->scond); isc_refcount_init(&sock->references, 1); + NETMGR_TRACE_LOG("isc__nmsocket_init():%p->references = %lu\n", sock, + isc_refcount_current(&sock->references)); + atomic_init(&sock->active, true); atomic_init(&sock->sequential, false); - atomic_init(&sock->overlimit, false); - atomic_init(&sock->processing, false); atomic_init(&sock->readpaused, false); atomic_init(&sock->closing, false); + atomic_store(&sock->active_child_connections, 0); + sock->magic = NMSOCK_MAGIC; } @@ -1172,8 +1259,8 @@ alloc_handle(isc_nmsocket_t *sock) { } isc_nmhandle_t * -isc__nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, - isc_sockaddr_t *local) { +isc___nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, + isc_sockaddr_t *local FLARG) { isc_nmhandle_t *handle = NULL; size_t handlenum; int pos; @@ -1189,7 +1276,10 @@ isc__nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, INSIST(VALID_NMHANDLE(handle)); } - isc__nmsocket_attach(sock, &handle->sock); + NETMGR_TRACE_LOG("isc__nmhandle_get():handle %p->references = %lu\n", + handle, isc_refcount_current(&handle->references)); + + isc___nmsocket_attach(sock, &handle->sock FLARG_PASS); #ifdef NETMGR_TRACE handle->backtrace_size = backtrace(handle->backtrace, TRACE_SIZE); @@ -1242,7 +1332,8 @@ isc__nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, if (sock->type == isc_nm_tcpsocket || sock->type == isc_nm_tlssocket || (sock->type == isc_nm_udpsocket && atomic_load(&sock->client)) || - (sock->type == isc_nm_tcpdnssocket && atomic_load(&sock->client))) + (sock->type == isc_nm_tcpdnssocket && atomic_load(&sock->client)) || + (sock->type == isc_nm_tlsdnssocket && atomic_load(&sock->client))) { INSIST(sock->statichandle == NULL); @@ -1259,10 +1350,13 @@ isc__nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, } void -isc_nmhandle_attach(isc_nmhandle_t *handle, isc_nmhandle_t **handlep) { +isc__nmhandle_attach(isc_nmhandle_t *handle, isc_nmhandle_t **handlep FLARG) { REQUIRE(VALID_NMHANDLE(handle)); REQUIRE(handlep != NULL && *handlep == NULL); + NETMGR_TRACE_LOG("isc__nmhandle_attach():handle %p->references = %lu\n", + handle, isc_refcount_current(&handle->references) + 1); + isc_refcount_increment(&handle->references); *handlep = handle; } @@ -1272,7 +1366,9 @@ isc_nmhandle_is_stream(isc_nmhandle_t *handle) { REQUIRE(VALID_NMHANDLE(handle)); return (handle->sock->type == isc_nm_tcpsocket || - handle->sock->type == isc_nm_tcpdnssocket); + handle->sock->type == isc_nm_tcpdnssocket || + handle->sock->type == isc_nm_tlssocket || + handle->sock->type == isc_nm_tlsdnssocket); } static void @@ -1324,7 +1420,7 @@ nmhandle_deactivate(isc_nmsocket_t *sock, isc_nmhandle_t *handle) { } void -isc_nmhandle_detach(isc_nmhandle_t **handlep) { +isc__nmhandle_detach(isc_nmhandle_t **handlep FLARG) { isc_nmsocket_t *sock = NULL; isc_nmhandle_t *handle = NULL; @@ -1336,19 +1432,23 @@ isc_nmhandle_detach(isc_nmhandle_t **handlep) { sock = handle->sock; if (sock->tid == isc_nm_tid()) { - nmhandle_detach_cb(&handle); + nmhandle_detach_cb(&handle FLARG_PASS); } else { isc__netievent_detach_t *event = - isc__nm_get_ievent(sock->mgr, netievent_detach); - event->handle = handle; /* implict attach */ - isc__nmsocket_attach(sock, &event->sock); + isc__nm_get_netievent_detach(sock->mgr, sock); + /* + * we are using implicit "attach" as the last reference + * need to be destroyed explicitly in the async callback + */ + event->handle = handle; + FLARG_IEVENT_PASS(event); isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)event); } } static void -nmhandle_detach_cb(isc_nmhandle_t **handlep) { +nmhandle_detach_cb(isc_nmhandle_t **handlep FLARG) { isc_nmsocket_t *sock = NULL; isc_nmhandle_t *handle = NULL; @@ -1358,6 +1458,9 @@ nmhandle_detach_cb(isc_nmhandle_t **handlep) { handle = *handlep; *handlep = NULL; + NETMGR_TRACE_LOG("isc__nmhandle_detach():%p->references = %lu\n", + handle, isc_refcount_current(&handle->references) - 1); + if (isc_refcount_decrement(&handle->references) > 1) { return; } @@ -1383,13 +1486,8 @@ nmhandle_detach_cb(isc_nmhandle_t **handlep) { if (sock->tid == isc_nm_tid()) { sock->closehandle_cb(sock); } else { - isc__netievent_closecb_t *event = isc__nm_get_ievent( - sock->mgr, netievent_closecb); - /* - * The socket will be finally detached by the closecb - * event handler. - */ - isc__nmsocket_attach(sock, &event->sock); + isc__netievent_close_t *event = + isc__nm_get_netievent_close(sock->mgr, sock); isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)event); } @@ -1400,7 +1498,7 @@ nmhandle_detach_cb(isc_nmhandle_t **handlep) { sock->statichandle = NULL; } - isc__nmsocket_detach(&sock); + isc___nmsocket_detach(&sock FLARG_PASS); } void * @@ -1434,6 +1532,9 @@ isc_nmhandle_settimeout(isc_nmhandle_t *handle, uint32_t timeout) { case isc_nm_tcpdnssocket: isc__nm_tcpdns_settimeout(handle, timeout); break; + case isc_nm_tlsdnssocket: + isc__nm_tlsdns_settimeout(handle, timeout); + break; default: INSIST(0); ISC_UNREACHABLE(); @@ -1470,7 +1571,7 @@ isc_nmhandle_netmgr(isc_nmhandle_t *handle) { } isc__nm_uvreq_t * -isc__nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock) { +isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG) { isc__nm_uvreq_t *req = NULL; REQUIRE(VALID_NM(mgr)); @@ -1488,14 +1589,14 @@ isc__nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock) { *req = (isc__nm_uvreq_t){ .magic = 0 }; ISC_LINK_INIT(req, link); req->uv_req.req.data = req; - isc__nmsocket_attach(sock, &req->sock); + isc___nmsocket_attach(sock, &req->sock FLARG_PASS); req->magic = UVREQ_MAGIC; return (req); } void -isc__nm_uvreq_put(isc__nm_uvreq_t **req0, isc_nmsocket_t *sock) { +isc___nm_uvreq_put(isc__nm_uvreq_t **req0, isc_nmsocket_t *sock FLARG) { isc__nm_uvreq_t *req = NULL; isc_nmhandle_t *handle = NULL; @@ -1522,10 +1623,10 @@ isc__nm_uvreq_put(isc__nm_uvreq_t **req0, isc_nmsocket_t *sock) { } if (handle != NULL) { - isc_nmhandle_detach(&handle); + isc__nmhandle_detach(&handle FLARG_PASS); } - isc__nmsocket_detach(&sock); + isc___nmsocket_detach(&sock FLARG_PASS); } void @@ -1547,6 +1648,9 @@ isc_nm_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb, case isc_nm_tlssocket: isc__nm_tls_send(handle, region, cb, cbarg); break; + case isc_nm_tlsdnssocket: + isc__nm_tlsdns_send(handle, region, cb, cbarg); + break; default: INSIST(0); ISC_UNREACHABLE(); @@ -1557,6 +1661,13 @@ void isc_nm_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { REQUIRE(VALID_NMHANDLE(handle)); + /* + * This is always called via callback (from accept or connect), and + * caller must attach to the handle, so the references always need to be + * at least 2. + */ + REQUIRE(isc_refcount_current(&handle->references) >= 2); + switch (handle->sock->type) { case isc_nm_udpsocket: isc__nm_udp_read(handle, cb, cbarg); @@ -1570,6 +1681,9 @@ isc_nm_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { case isc_nm_tlssocket: isc__nm_tls_read(handle, cb, cbarg); break; + case isc_nm_tlsdnssocket: + isc__nm_tlsdns_read(handle, cb, cbarg); + break; default: INSIST(0); ISC_UNREACHABLE(); @@ -1590,6 +1704,9 @@ isc_nm_cancelread(isc_nmhandle_t *handle) { case isc_nm_tcpdnssocket: isc__nm_tcpdns_cancelread(handle); break; + case isc_nm_tlsdnssocket: + isc__nm_tlsdns_cancelread(handle); + break; default: INSIST(0); ISC_UNREACHABLE(); @@ -1604,10 +1721,10 @@ isc_nm_pauseread(isc_nmhandle_t *handle) { switch (sock->type) { case isc_nm_tcpsocket: - isc__nm_tcp_pauseread(sock); + isc__nm_tcp_pauseread(handle); break; case isc_nm_tlssocket: - isc__nm_tls_pauseread(sock); + isc__nm_tls_pauseread(handle); break; default: INSIST(0); @@ -1623,10 +1740,10 @@ isc_nm_resumeread(isc_nmhandle_t *handle) { switch (sock->type) { case isc_nm_tcpsocket: - isc__nm_tcp_resumeread(sock); + isc__nm_tcp_resumeread(handle); break; case isc_nm_tlssocket: - isc__nm_tls_resumeread(sock); + isc__nm_tls_resumeread(handle); break; default: INSIST(0); @@ -1651,6 +1768,9 @@ isc_nm_stoplistening(isc_nmsocket_t *sock) { case isc_nm_tlslistener: isc__nm_tls_stoplistening(sock); break; + case isc_nm_tlsdnslistener: + isc__nm_tlsdns_stoplistening(sock); + break; default: INSIST(0); ISC_UNREACHABLE(); @@ -1660,21 +1780,16 @@ isc_nm_stoplistening(isc_nmsocket_t *sock) { void isc__nm_connectcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq, isc_result_t eresult) { - isc__netievent_connectcb_t *ievent = - isc__nm_get_ievent(sock->mgr, netievent_connectcb); - REQUIRE(VALID_NMSOCK(sock)); REQUIRE(VALID_UVREQ(uvreq)); REQUIRE(VALID_NMHANDLE(uvreq->handle)); - ievent->sock = sock; - ievent->req = uvreq; - ievent->result = eresult; + isc__netievent_connectcb_t *ievent = isc__nm_get_netievent_connectcb( + sock->mgr, sock, uvreq, eresult); if (eresult == ISC_R_SUCCESS) { - isc__nm_async_connectcb(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); - isc__nm_put_ievent(sock->mgr, ievent); + isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); } else { isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)ievent); @@ -1701,68 +1816,20 @@ isc__nm_async_connectcb(isc__networker_t *worker, isc__netievent_t *ev0) { isc__nm_uvreq_put(&uvreq, sock); } -void -isc__nm_acceptcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq, - isc_result_t eresult) { - isc__netievent_acceptcb_t *ievent = - isc__nm_get_ievent(sock->mgr, netievent_acceptcb); - - REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(VALID_UVREQ(uvreq)); - REQUIRE(VALID_NMHANDLE(uvreq->handle)); - - ievent->sock = sock; - ievent->req = uvreq; - ievent->result = eresult; - - if (eresult == ISC_R_SUCCESS) { - isc__nm_async_acceptcb(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); - isc__nm_put_ievent(sock->mgr, ievent); - } else { - isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); - } -} - -void -isc__nm_async_acceptcb(isc__networker_t *worker, isc__netievent_t *ev0) { - isc__netievent_acceptcb_t *ievent = (isc__netievent_acceptcb_t *)ev0; - isc_nmsocket_t *sock = ievent->sock; - isc__nm_uvreq_t *uvreq = ievent->req; - isc_result_t eresult = ievent->result; - - UNUSED(worker); - - REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(VALID_UVREQ(uvreq)); - REQUIRE(VALID_NMHANDLE(uvreq->handle)); - REQUIRE(sock->tid == isc_nm_tid()); - REQUIRE(uvreq->cb.accept != NULL); - - uvreq->cb.accept(uvreq->handle, eresult, uvreq->cbarg); - - isc__nm_uvreq_put(&uvreq, sock); -} - void isc__nm_readcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq, isc_result_t eresult) { - isc__netievent_readcb_t *ievent = isc__nm_get_ievent(sock->mgr, - netievent_readcb); - REQUIRE(VALID_NMSOCK(sock)); REQUIRE(VALID_UVREQ(uvreq)); REQUIRE(VALID_NMHANDLE(uvreq->handle)); - ievent->sock = sock; - ievent->req = uvreq; - ievent->result = eresult; + isc__netievent_readcb_t *ievent = + isc__nm_get_netievent_readcb(sock->mgr, sock, uvreq, eresult); if (eresult == ISC_R_SUCCESS) { - isc__nm_async_readcb(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); - isc__nm_put_ievent(sock->mgr, ievent); + REQUIRE(sock->tid == isc_nm_tid()); + isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); } else { isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)ievent); @@ -1793,21 +1860,17 @@ isc__nm_async_readcb(isc__networker_t *worker, isc__netievent_t *ev0) { void isc__nm_sendcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq, isc_result_t eresult) { - isc__netievent_sendcb_t *ievent = isc__nm_get_ievent(sock->mgr, - netievent_sendcb); - REQUIRE(VALID_NMSOCK(sock)); REQUIRE(VALID_UVREQ(uvreq)); REQUIRE(VALID_NMHANDLE(uvreq->handle)); - ievent->sock = sock; - ievent->req = uvreq; - ievent->result = eresult; + isc__netievent_sendcb_t *ievent = + isc__nm_get_netievent_sendcb(sock->mgr, sock, uvreq, eresult); if (eresult == ISC_R_SUCCESS) { - isc__nm_async_sendcb(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); - isc__nm_put_ievent(sock->mgr, ievent); + REQUIRE(sock->tid == isc_nm_tid()); + isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); } else { isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)ievent); @@ -1833,9 +1896,9 @@ isc__nm_async_sendcb(isc__networker_t *worker, isc__netievent_t *ev0) { isc__nm_uvreq_put(&uvreq, sock); } -void -isc__nm_async_closecb(isc__networker_t *worker, isc__netievent_t *ev0) { - isc__netievent_closecb_t *ievent = (isc__netievent_closecb_t *)ev0; +static void +isc__nm_async_close(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_close_t *ievent = (isc__netievent_close_t *)ev0; isc_nmsocket_t *sock = ievent->sock; REQUIRE(VALID_NMSOCK(ievent->sock)); @@ -1845,21 +1908,20 @@ isc__nm_async_closecb(isc__networker_t *worker, isc__netievent_t *ev0) { UNUSED(worker); ievent->sock->closehandle_cb(sock); - isc__nmsocket_detach(&sock); } void isc__nm_async_detach(isc__networker_t *worker, isc__netievent_t *ev0) { isc__netievent_detach_t *ievent = (isc__netievent_detach_t *)ev0; + FLARG_IEVENT(ievent); REQUIRE(VALID_NMSOCK(ievent->sock)); + REQUIRE(VALID_NMHANDLE(ievent->handle)); REQUIRE(ievent->sock->tid == isc_nm_tid()); - REQUIRE(ievent->handle != NULL); UNUSED(worker); - isc__nmsocket_detach(&ievent->sock); - nmhandle_detach_cb(&ievent->handle); + nmhandle_detach_cb(&ievent->handle FLARG_PASS); } static void @@ -1873,15 +1935,34 @@ shutdown_walk_cb(uv_handle_t *handle, void *arg) { switch (handle->type) { case UV_UDP: - REQUIRE(VALID_NMSOCK(sock)); - isc__nm_udp_shutdown(sock); - break; case UV_TCP: - REQUIRE(VALID_NMSOCK(sock)); - isc__nm_tcp_shutdown(sock); break; default: + return; + } + + REQUIRE(VALID_NMSOCK(sock)); + switch (sock->type) { + case isc_nm_udpsocket: + isc__nm_udp_shutdown(sock); break; + case isc_nm_tcpsocket: + isc__nm_tcp_shutdown(sock); + break; + case isc_nm_tcpdnssocket: + isc__nm_tcpdns_shutdown(sock); + break; + case isc_nm_tlsdnssocket: + /* dummy now */ + break; + case isc_nm_udplistener: + case isc_nm_tcplistener: + case isc_nm_tcpdnslistener: + case isc_nm_tlsdnslistener: + return; + default: + INSIST(0); + ISC_UNREACHABLE(); } } @@ -1984,6 +2065,15 @@ isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp) { return (ISC_R_SUCCESS); } +void +isc__nm_closesocket(uv_os_sock_t sock) { +#ifdef WIN32 + closesocket(sock); +#else + close(sock); +#endif +} + #define setsockopt_on(socket, level, name) \ setsockopt(socket, level, name, &(int){ 1 }, sizeof(int)) @@ -2153,6 +2243,48 @@ isc__nm_socket_dontfrag(uv_os_sock_t fd, sa_family_t sa_family) { return (ISC_R_NOTIMPLEMENTED); } +#if defined(_WIN32) +#define TIMEOUT_TYPE DWORD +#define TIMEOUT_DIV 1000 +#define TIMEOUT_OPTNAME TCP_MAXRT +#elif defined(TCP_CONNECTIONTIMEOUT) +#define TIMEOUT_TYPE int +#define TIMEOUT_DIV 1000 +#define TIMEOUT_OPTNAME TCP_CONNECTIONTIMEOUT +#elif defined(TCP_RXT_CONNDROPTIME) +#define TIMEOUT_TYPE int +#define TIMEOUT_DIV 1000 +#define TIMEOUT_OPTNAME TCP_RXT_CONNDROPTIME +#elif defined(TCP_USER_TIMEOUT) +#define TIMEOUT_TYPE unsigned int +#define TIMEOUT_DIV 1 +#define TIMEOUT_OPTNAME TCP_USER_TIMEOUT +#endif + +isc_result_t +isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms) { +#if defined(TIMEOUT_OPTNAME) + TIMEOUT_TYPE timeout = timeout_ms / TIMEOUT_DIV; + + if (timeout == 0) { + timeout = 1; + } + + if (setsockopt(fd, IPPROTO_TCP, TIMEOUT_OPTNAME, &timeout, + sizeof(timeout)) == -1) + { + return (ISC_R_FAILURE); + } + + return (ISC_R_SUCCESS); +#else + UNUSED(fd); + UNUSED(timeout_ms); + + return (ISC_R_SUCCESS); +#endif +} + #ifdef NETMGR_TRACE /* * Dump all active sockets in netmgr. We output to stderr @@ -2178,6 +2310,10 @@ nmsocket_type_totext(isc_nmsocket_type type) { return ("isc_nm_tlssocket"); case isc_nm_tlslistener: return ("isc_nm_tlslistener"); + case isc_nm_tlsdnslistener: + return ("isc_nm_tlsdnslistener"); + case isc_nm_tlsdnssocket: + return ("isc_nm_tlsdnssocket"); default: INSIST(0); ISC_UNREACHABLE(); @@ -2200,11 +2336,18 @@ nmsocket_dump(isc_nmsocket_t *sock) { LOCK(&sock->lock); fprintf(stderr, "\n=================\n"); - fprintf(stderr, "Active socket %p, type %s, refs %lu\n", sock, + fprintf(stderr, "Active %s socket %p, type %s, refs %lu\n", + sock->client ? "client" : "server", sock, nmsocket_type_totext(sock->type), isc_refcount_current(&sock->references)); - fprintf(stderr, "Parent %p, listener %p, server %p\n", sock->parent, - sock->listener, sock->server); + fprintf(stderr, + "Parent %p, listener %p, server %p, statichandle = %p\n", + sock->parent, sock->listener, sock->server, sock->statichandle); + fprintf(stderr, "Flags:%s%s%s%s%s\n", sock->active ? " active" : "", + sock->closing ? " closing" : "", + sock->destroying ? " destroying" : "", + sock->connecting ? " connecting" : "", + sock->accepting ? " accepting" : ""); fprintf(stderr, "Created by:\n"); backtrace_symbols_fd(sock->backtrace, sock->backtrace_size, STDERR_FILENO); diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index 4997d6f8de..0433da859d 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -69,8 +69,6 @@ read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf); static void tcp_close_cb(uv_handle_t *uvhandle); -static void -tcp_listenclose_cb(uv_handle_t *handle); static isc_result_t accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota); @@ -83,16 +81,38 @@ failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult); static void failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, isc_result_t eresult); +static void +stop_tcp_parent(isc_nmsocket_t *sock); +static void +stop_tcp_child(isc_nmsocket_t *sock); + +static void +start_sock_timer(isc_nmsocket_t *sock); + +static void +start_reading(isc_nmsocket_t *sock); + +static void +stop_reading(isc_nmsocket_t *sock); + +static isc__nm_uvreq_t * +get_read_req(isc_nmsocket_t *sock); + +static void +tcp_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf); static bool inactive(isc_nmsocket_t *sock) { - return (!isc__nmsocket_active(sock) || + return (!isc__nmsocket_active(sock) || atomic_load(&sock->closing) || atomic_load(&sock->mgr->closing) || (sock->server != NULL && !isc__nmsocket_active(sock->server))); } static void failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult) { + REQUIRE(sock->accepting); + REQUIRE(sock->server); + /* * Detach the quota early to make room for other connections; * otherwise it'd be detached later asynchronously, and clog @@ -102,9 +122,8 @@ failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult) { isc_quota_detach(&sock->quota); } - if (!sock->accepting) { - return; - } + isc__nmsocket_detach(&sock->server); + sock->accepting = false; switch (eresult) { @@ -117,11 +136,6 @@ failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult) { "Accepting TCP connection failed: %s", isc_result_totext(eresult)); } - - /* - * Detach the socket properly to make sure uv_close() is called. - */ - isc__nmsocket_detach(&sock); } static void @@ -130,37 +144,15 @@ failed_connect_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, REQUIRE(VALID_NMSOCK(sock)); REQUIRE(VALID_UVREQ(req)); REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(atomic_load(&sock->connecting)); + REQUIRE(req->cb.connect != NULL); - if (sock->timer_running) { - uv_timer_stop(&sock->timer); - sock->timer_running = false; - } - - if (!atomic_load(&sock->connecting)) { - isc__nm_uvreq_put(&req, sock); - return; - } atomic_store(&sock->connecting, false); isc__nmsocket_clearcb(sock); - if (req->cb.connect != NULL) { - isc__nm_connectcb(sock, req, eresult); - } else { - isc__nm_uvreq_put(&req, sock); - } -} + isc__nm_connectcb(sock, req, eresult); -static void -connecttimeout_cb(uv_timer_t *handle) { - isc__nm_uvreq_t *req = uv_handle_get_data((uv_handle_t *)handle); - isc_nmsocket_t *sock = req->sock; - - REQUIRE(VALID_UVREQ(req)); - REQUIRE(VALID_NMHANDLE(req->handle)); - REQUIRE(sock->tid == isc_nm_tid()); - - failed_connect_cb(sock, req, ISC_R_TIMEDOUT); - isc__nmsocket_detach(&sock); + isc__nmsocket_prep_destroy(sock); } static isc_result_t @@ -179,49 +171,50 @@ tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { atomic_store(&sock->connecting, true); r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp); + RUNTIME_CHECK(r == 0); + uv_handle_set_data(&sock->uv_handle.handle, sock); + + r = uv_timer_init(&worker->loop, &sock->timer); + RUNTIME_CHECK(r == 0); + uv_handle_set_data((uv_handle_t *)&sock->timer, sock); + + r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd); if (r != 0) { + isc__nm_closesocket(sock->fd); isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]); - atomic_store(&sock->closing, true); - atomic_store(&sock->closed, true); - atomic_store(&sock->active, false); - return (isc__nm_uverr2result(r)); + goto failure; } + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]); if (req->local.length != 0) { r = uv_tcp_bind(&sock->uv_handle.tcp, &req->local.type.sa, 0); if (r != 0) { isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]); - atomic_store(&sock->active, false); - isc__nm_tcp_close(sock); - return (isc__nm_uverr2result(r)); + goto failure; } } - if (!sock->timer_initialized) { - uv_timer_init(&worker->loop, &sock->timer); - uv_handle_set_data((uv_handle_t *)&sock->timer, req); - sock->timer_initialized = true; - } - - uv_handle_set_data(&sock->uv_handle.handle, sock); uv_handle_set_data(&req->uv_req.handle, req); r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp, &req->peer.type.sa, tcp_connect_cb); if (r != 0) { isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CONNECTFAIL]); - atomic_store(&sock->active, false); - isc__nm_tcp_close(sock); - return (isc__nm_uverr2result(r)); + goto failure; } isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CONNECT]); - uv_timer_start(&sock->timer, connecttimeout_cb, sock->connect_timeout, - 0); - sock->timer_running = true; + atomic_store(&sock->connected, true); return (ISC_R_SUCCESS); + +failure: + atomic_store(&sock->active, false); + + isc__nm_tcp_close(sock); + + return (isc__nm_uverr2result(r)); } void @@ -240,22 +233,27 @@ isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ev0) { REQUIRE(sock->parent == NULL); REQUIRE(sock->tid == isc_nm_tid()); - req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface->addr); result = tcp_connect_direct(sock, req); - atomic_store(&sock->result, result); if (result == ISC_R_SUCCESS) { atomic_store(&sock->connected, true); - /* uvreq will be freed in tcp_connect_cb */ - /* socket will be detached in tcp_connect_cb */ + /* The connect cb will be executed in tcp_connect_cb() */ } else { - atomic_store(&sock->connect_error, true); isc__nm_uvreq_put(&req, sock); - isc__nmsocket_detach(&ievent->sock); } LOCK(&sock->lock); + sock->result = result; SIGNAL(&sock->cond); + if (!atomic_load(&sock->active)) { + WAIT(&sock->scond, &sock->lock); + } + INSIST(atomic_load(&sock->active)); UNLOCK(&sock->lock); + + /* + * The sock is now attached to the handle. + */ + isc__nmsocket_detach(&sock); } static void @@ -268,35 +266,30 @@ tcp_connect_cb(uv_connect_t *uvreq, int status) { REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->tid == isc_nm_tid()); - - /* We timed out */ - if (!atomic_load(&sock->connecting)) { - return; - } + REQUIRE(atomic_load(&sock->connecting)); req = uv_handle_get_data((uv_handle_t *)uvreq); REQUIRE(VALID_UVREQ(req)); REQUIRE(VALID_NMHANDLE(req->handle)); - if (sock->timer_running) { - uv_timer_stop(&sock->timer); - sock->timer_running = false; + /* Socket was closed midflight by isc__nm_tcp_shutdown() */ + if (!isc__nmsocket_active(sock)) { + result = ISC_R_CANCELED; + goto error; } if (status != 0) { - failed_connect_cb(sock, req, isc__nm_uverr2result(status)); - isc__nmsocket_detach(&sock); - return; + result = isc__nm_uverr2result(status); + goto error; } isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CONNECT]); r = uv_tcp_getpeername(&sock->uv_handle.tcp, (struct sockaddr *)&ss, &(int){ sizeof(ss) }); if (r != 0) { - failed_connect_cb(sock, req, isc__nm_uverr2result(r)); - isc__nmsocket_detach(&sock); - return; + result = isc__nm_uverr2result(r); + goto error; } atomic_store(&sock->connecting, false); @@ -306,10 +299,10 @@ tcp_connect_cb(uv_connect_t *uvreq, int status) { isc__nm_connectcb(sock, req, ISC_R_SUCCESS); - /* - * The sock is now attached to the handle. - */ - isc__nmsocket_detach(&sock); + return; + +error: + failed_connect_cb(sock, req, result); } isc_result_t @@ -317,60 +310,93 @@ isc_nm_tcpconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer, isc_nm_cb_t cb, void *cbarg, unsigned int timeout, size_t extrahandlesize) { isc_result_t result = ISC_R_SUCCESS; - isc_nmsocket_t *sock = NULL, *tmp = NULL; + isc_nmsocket_t *sock = NULL; isc__netievent_tcpconnect_t *ievent = NULL; isc__nm_uvreq_t *req = NULL; + sa_family_t sa_family; + uv_os_sock_t fd; REQUIRE(VALID_NM(mgr)); REQUIRE(local != NULL); REQUIRE(peer != NULL); + sa_family = peer->addr.type.sa.sa_family; + + /* + * The socket() call can fail spuriously on FreeBSD 12, so we need to + * handle the failure early and gracefully. + */ + result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &fd); + if (result != ISC_R_SUCCESS) { + return (result); + } + sock = isc_mem_get(mgr->mctx, sizeof(*sock)); isc__nmsocket_init(sock, mgr, isc_nm_tcpsocket, local); + atomic_init(&sock->active, false); sock->extrahandlesize = extrahandlesize; sock->connect_timeout = timeout; - - atomic_init(&sock->result, ISC_R_SUCCESS); + sock->result = ISC_R_DEFAULT; + sock->fd = fd; atomic_init(&sock->client, true); + result = isc__nm_socket_connectiontimeout(fd, timeout); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + req = isc__nm_uvreq_get(mgr, sock); req->cb.connect = cb; req->cbarg = cbarg; req->peer = peer->addr; req->local = local->addr; + req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface->addr); - ievent = isc__nm_get_ievent(mgr, netievent_tcpconnect); - ievent->sock = sock; - ievent->req = req; - - /* - * Async callbacks can dereference the socket in the meantime, - * we need to hold an additional reference to it. - */ - isc__nmsocket_attach(sock, &tmp); + ievent = isc__nm_get_netievent_tcpconnect(mgr, sock, req); if (isc__nm_in_netthread()) { + atomic_store(&sock->active, true); sock->tid = isc_nm_tid(); isc__nm_async_tcpconnect(&mgr->workers[sock->tid], (isc__netievent_t *)ievent); - isc__nm_put_ievent(mgr, ievent); + isc__nm_put_netievent_tcpconnect(mgr, ievent); } else { sock->tid = isc_random_uniform(mgr->nworkers); isc__nm_enqueue_ievent(&mgr->workers[sock->tid], (isc__netievent_t *)ievent); - - LOCK(&sock->lock); - while (!atomic_load(&sock->connected) && - !atomic_load(&sock->connect_error)) { - WAIT(&sock->cond, &sock->lock); - } - UNLOCK(&sock->lock); } + LOCK(&sock->lock); + result = sock->result; + while (result == ISC_R_DEFAULT) { + WAIT(&sock->cond, &sock->lock); + result = sock->result; + } + atomic_store(&sock->active, true); + BROADCAST(&sock->scond); + UNLOCK(&sock->lock); + INSIST(result != ISC_R_DEFAULT); - result = atomic_load(&sock->result); + return (result); +} - isc__nmsocket_detach(&tmp); +static isc_result_t +isc__nm_tcp_lb_socket(sa_family_t sa_family, uv_os_sock_t *sockp) { + isc_result_t result; + uv_os_sock_t sock; + + result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock); + REQUIRE(result == ISC_R_SUCCESS); + + (void)isc__nm_socket_incoming_cpu(sock); + + /* FIXME: set mss */ + + result = isc__nm_socket_reuse(sock); + REQUIRE(result == ISC_R_SUCCESS || result == ISC_R_NOTIMPLEMENTED); + + result = isc__nm_socket_reuse_lb(sock); + REQUIRE(result == ISC_R_SUCCESS || result == ISC_R_NOTIMPLEMENTED); + + *sockp = sock; return (result); } @@ -380,160 +406,127 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_accept_cb_t accept_cb, void *accept_cbarg, size_t extrahandlesize, int backlog, isc_quota_t *quota, isc_nmsocket_t **sockp) { - isc_nmsocket_t *nsock = NULL; - isc__netievent_tcplisten_t *ievent = NULL; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *sock = NULL; + sa_family_t sa_family = iface->addr.type.sa.sa_family; + size_t children_size = 0; REQUIRE(VALID_NM(mgr)); - nsock = isc_mem_get(mgr->mctx, sizeof(*nsock)); - isc__nmsocket_init(nsock, mgr, isc_nm_tcplistener, iface); + sock = isc_mem_get(mgr->mctx, sizeof(*sock)); + isc__nmsocket_init(sock, mgr, isc_nm_tcplistener, iface); - nsock->accept_cb = accept_cb; - nsock->accept_cbarg = accept_cbarg; - nsock->extrahandlesize = extrahandlesize; - nsock->backlog = backlog; - atomic_init(&nsock->result, ISC_R_SUCCESS); - if (quota != NULL) { + sock->rchildren = 0; + sock->nchildren = mgr->nworkers; + children_size = sock->nchildren * sizeof(sock->children[0]); + sock->children = isc_mem_get(mgr->mctx, children_size); + memset(sock->children, 0, children_size); + + sock->result = ISC_R_DEFAULT; + sock->tid = isc_random_uniform(mgr->nworkers); + sock->fd = -1; + + for (size_t i = 0; i < mgr->nworkers; i++) { + isc__netievent_tcplisten_t *ievent = NULL; + isc_nmsocket_t *csock = &sock->children[i]; + + isc__nmsocket_init(csock, mgr, isc_nm_tcpsocket, iface); + csock->parent = sock; + csock->accept_cb = accept_cb; + csock->accept_cbarg = accept_cbarg; + csock->extrahandlesize = extrahandlesize; + csock->backlog = backlog; + csock->tid = i; /* * We don't attach to quota, just assign - to avoid * increasing quota unnecessarily. */ - nsock->pquota = quota; - } - isc_quota_cb_init(&nsock->quotacb, quota_accept_cb, nsock); + csock->pquota = quota; + isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock); - ievent = isc__nm_get_ievent(mgr, netievent_tcplisten); - ievent->sock = nsock; - if (isc__nm_in_netthread()) { - nsock->tid = isc_nm_tid(); - isc__nm_async_tcplisten(&mgr->workers[nsock->tid], - (isc__netievent_t *)ievent); - isc__nm_put_ievent(mgr, ievent); - } else { - nsock->tid = isc_random_uniform(mgr->nworkers); - isc__nm_enqueue_ievent(&mgr->workers[nsock->tid], + result = isc__nm_tcp_lb_socket(sa_family, &csock->fd); + REQUIRE(result == ISC_R_SUCCESS || + result == ISC_R_NOTIMPLEMENTED); + REQUIRE(csock->fd >= 0); + + ievent = isc__nm_get_netievent_tcplisten(mgr, csock); + isc__nm_enqueue_ievent(&mgr->workers[i], (isc__netievent_t *)ievent); - - LOCK(&nsock->lock); - while (!atomic_load(&nsock->listening) && - !atomic_load(&nsock->listen_error)) { - WAIT(&nsock->cond, &nsock->lock); - } - UNLOCK(&nsock->lock); } - if (atomic_load(&nsock->result) == ISC_R_SUCCESS) { - *sockp = nsock; - return (ISC_R_SUCCESS); + LOCK(&sock->lock); + while (sock->rchildren != mgr->nworkers) { + WAIT(&sock->cond, &sock->lock); + } + result = sock->result; + atomic_store(&sock->active, true); + BROADCAST(&sock->scond); + UNLOCK(&sock->lock); + INSIST(result != ISC_R_DEFAULT); + + if (result == ISC_R_SUCCESS) { + REQUIRE(sock->rchildren == mgr->nworkers); + *sockp = sock; } else { - isc_result_t result = atomic_load(&nsock->result); - isc__nmsocket_detach(&nsock); - return (result); + atomic_store(&sock->active, false); + isc__nm_tcp_stoplistening(sock); + isc_nmsocket_close(&sock); } + + return (result); } -/* - * For multi-threaded TCP listening, we create a single socket, - * bind to it, and start listening. On an incoming connection we accept - * it, and then pass the accepted socket using the uv_export/uv_import - * mechanism to a child thread. - */ void isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { isc__netievent_tcplisten_t *ievent = (isc__netievent_tcplisten_t *)ev0; - isc_nmsocket_t *sock = ievent->sock; - struct sockaddr_storage sname; - int r, flags = 0, snamelen = sizeof(sname); + isc_nmiface_t *iface; sa_family_t sa_family; - uv_os_sock_t fd; + int r; + int flags = 0; + isc_nmsocket_t *sock = NULL; - REQUIRE(isc__nm_in_netthread()); - REQUIRE(sock->type == isc_nm_tcplistener); + REQUIRE(VALID_NMSOCK(ievent->sock)); + REQUIRE(ievent->sock->tid == isc_nm_tid()); + REQUIRE(VALID_NMSOCK(ievent->sock->parent)); + + sock = ievent->sock; + iface = sock->iface; + sa_family = iface->addr.type.sa.sa_family; + + REQUIRE(sock->type == isc_nm_tcpsocket); + REQUIRE(sock->iface != NULL); + REQUIRE(sock->parent != NULL); + REQUIRE(sock->tid == isc_nm_tid()); + + /* TODO: set min mss */ r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp); - if (r != 0) { - isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]); - /* The socket was never opened, so no need for uv_close() */ - atomic_store(&sock->closed, true); - atomic_store(&sock->result, isc__nm_uverr2result(r)); - atomic_store(&sock->listen_error, true); - goto done; - } + RUNTIME_CHECK(r == 0); + uv_handle_set_data(&sock->uv_handle.handle, sock); + /* This keeps the socket alive after everything else is gone */ + isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL }); + r = uv_timer_init(&worker->loop, &sock->timer); + RUNTIME_CHECK(r == 0); + uv_handle_set_data((uv_handle_t *)&sock->timer, sock); + + r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd); + if (r < 0) { + isc__nm_closesocket(sock->fd); + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]); + goto failure; + } isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]); - sa_family = sock->iface->addr.type.sa.sa_family; if (sa_family == AF_INET6) { flags = UV_TCP_IPV6ONLY; } - uv_tcp_bind(&sock->uv_handle.tcp, &sock->iface->addr.type.sa, flags); - r = uv_fileno(&sock->uv_handle.handle, (uv_os_fd_t *)&fd); - if (r != 0) { + r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, + &sock->iface->addr.type.sa, flags); + if (r < 0 && r != UV_EINVAL) { isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]); - uv_close(&sock->uv_handle.handle, tcp_close_cb); - atomic_store(&sock->result, isc__nm_uverr2result(r)); - atomic_store(&sock->listen_error, true); - goto done; - } - - /* - * uv_tcp_bind() uses a delayed error, initially returning - * success even if bind() fails. By calling uv_tcp_getsockname() - * here we can find out whether the bind() call was successful. - */ - r = uv_tcp_getsockname(&sock->uv_handle.tcp, (struct sockaddr *)&sname, - &snamelen); - - if (r == UV_EADDRINUSE && isc__nm_socket_reuse(fd) == ISC_R_SUCCESS && - isc__nm_socket_reuse_lb(fd) == ISC_R_SUCCESS) - { - /* - * Retry bind() with REUSEADDR/REUSEPORT if the address - * was in use. - */ - uv_tcp_bind(&sock->uv_handle.tcp, &sock->iface->addr.type.sa, - flags); - r = uv_tcp_getsockname(&sock->uv_handle.tcp, - (struct sockaddr *)&sname, &snamelen); - } - - if (r == UV_EADDRNOTAVAIL && - isc__nm_socket_freebind(fd, sa_family) == ISC_R_SUCCESS) - { - /* - * Retry binding with IP_FREEBIND (or equivalent option) if the - * address is not available. This helps with IPv6 tentative - * addresses which are reported by the route socket, although - * named is not yet able to properly bind to them. - */ - uv_tcp_bind(&sock->uv_handle.tcp, &sock->iface->addr.type.sa, - flags); - r = uv_tcp_getsockname(&sock->uv_handle.tcp, - (struct sockaddr *)&sname, &snamelen); - } - - if (r != 0) { - isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]); - uv_close(&sock->uv_handle.handle, tcp_close_cb); - atomic_store(&sock->result, isc__nm_uverr2result(r)); - atomic_store(&sock->listen_error, true); - goto done; - } - - /* - * By doing this now, we can find out immediately whether bind() - * failed, and quit if so. (uv_bind() uses a delayed error, - * initially returning success even if bind() fails, and this - * could cause a deadlock later if we didn't check first.) - */ - r = uv_tcp_getsockname(&sock->uv_handle.tcp, (struct sockaddr *)&sname, - &snamelen); - if (r != 0) { - uv_close(&sock->uv_handle.handle, tcp_close_cb); - atomic_store(&sock->result, isc__nm_uverr2result(r)); - atomic_store(&sock->listen_error, true); - goto done; + goto failure; } /* @@ -542,36 +535,78 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { */ r = uv_listen((uv_stream_t *)&sock->uv_handle.tcp, sock->backlog, tcp_connection_cb); - if (r != 0) { + if (r < 0) { isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR, "uv_listen failed: %s", isc_result_totext(isc__nm_uverr2result(r))); - uv_close(&sock->uv_handle.handle, tcp_close_cb); - atomic_store(&sock->result, isc__nm_uverr2result(r)); - atomic_store(&sock->listen_error, true); - goto done; + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]); + goto failure; } - uv_handle_set_data(&sock->uv_handle.handle, sock); - atomic_store(&sock->listening, true); -done: - LOCK(&sock->lock); - SIGNAL(&sock->cond); - UNLOCK(&sock->lock); + LOCK(&sock->parent->lock); + sock->parent->rchildren += 1; + if (sock->parent->result == ISC_R_DEFAULT) { + sock->parent->result = ISC_R_SUCCESS; + } + SIGNAL(&sock->parent->cond); + if (!atomic_load(&sock->parent->active)) { + WAIT(&sock->parent->scond, &sock->parent->lock); + } + INSIST(atomic_load(&sock->parent->active)); + UNLOCK(&sock->parent->lock); + return; + +failure: + sock->pquota = NULL; + + LOCK(&sock->parent->lock); + sock->parent->rchildren += 1; + if (sock->parent->result == ISC_R_DEFAULT) { + sock->parent->result = isc__nm_uverr2result(r); + } + SIGNAL(&sock->parent->cond); + if (!atomic_load(&sock->parent->active)) { + WAIT(&sock->parent->scond, &sock->parent->lock); + } + INSIST(atomic_load(&sock->parent->active)); + UNLOCK(&sock->parent->lock); } static void tcp_connection_cb(uv_stream_t *server, int status) { - isc_nmsocket_t *psock = uv_handle_get_data((uv_handle_t *)server); + isc_nmsocket_t *ssock = uv_handle_get_data((uv_handle_t *)server); isc_result_t result; + isc_quota_t *quota = NULL; - UNUSED(status); + if (status != 0) { + result = isc__nm_uverr2result(status); + goto done; + } - result = accept_connection(psock, NULL); + REQUIRE(VALID_NMSOCK(ssock)); + REQUIRE(ssock->tid == isc_nm_tid()); + + if (inactive(ssock)) { + result = ISC_R_CANCELED; + goto done; + } + + if (ssock->pquota != NULL) { + result = isc_quota_attach_cb(ssock->pquota, "a, + &ssock->quotacb); + if (result == ISC_R_QUOTA) { + isc__nm_incstats(ssock->mgr, + ssock->statsindex[STATID_ACCEPTFAIL]); + return; + } + } + + result = accept_connection(ssock, quota); +done: if (result != ISC_R_SUCCESS && result != ISC_R_NOCONN) { if ((result != ISC_R_QUOTA && result != ISC_R_SOFTQUOTA) || can_log_tcp_quota()) { @@ -583,98 +618,12 @@ tcp_connection_cb(uv_stream_t *server, int status) { } } -void -isc__nm_async_tcpchildaccept(isc__networker_t *worker, isc__netievent_t *ev0) { - isc__netievent_tcpchildaccept_t *ievent = - (isc__netievent_tcpchildaccept_t *)ev0; - isc_nmsocket_t *sock = ievent->sock; - isc_result_t result; - isc__nm_uvreq_t *req = NULL; - struct sockaddr_storage ss; - isc_sockaddr_t local; - int r; - - REQUIRE(isc__nm_in_netthread()); - REQUIRE(sock->tid == isc_nm_tid()); - - if (!sock->accepting) { - return; - } - - /* Socket was closed midflight by isc__nm_tcp_shutdown() */ - if (!isc__nmsocket_active(sock)) { - failed_accept_cb(sock, ISC_R_CANCELED); - return; - } - - INSIST(sock->server != NULL); - - if (!isc__nmsocket_active(sock->server)) { - failed_accept_cb(sock, ISC_R_CANCELED); - return; - } - - sock->quota = ievent->quota; - ievent->quota = NULL; - - worker = &sock->mgr->workers[isc_nm_tid()]; - uv_tcp_init(&worker->loop, &sock->uv_handle.tcp); - - r = isc_uv_import(&sock->uv_handle.stream, &ievent->streaminfo); - if (r != 0) { - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR, - "uv_import failed: %s", - isc_result_totext(isc__nm_uverr2result(r))); - result = isc__nm_uverr2result(r); - goto error; - } - - r = uv_tcp_getpeername(&sock->uv_handle.tcp, (struct sockaddr *)&ss, - &(int){ sizeof(ss) }); - if (r != 0) { - result = isc__nm_uverr2result(r); - goto error; - } - - result = isc_sockaddr_fromsockaddr(&sock->peer, (struct sockaddr *)&ss); - if (result != ISC_R_SUCCESS) { - goto error; - } - - r = uv_tcp_getsockname(&sock->uv_handle.tcp, (struct sockaddr *)&ss, - &(int){ sizeof(ss) }); - if (r != 0) { - result = isc__nm_uverr2result(r); - goto error; - } - - result = isc_sockaddr_fromsockaddr(&local, (struct sockaddr *)&ss); - if (result != ISC_R_SUCCESS) { - goto error; - } - sock->accepting = false; - - INSIST(sock->accept_cb != NULL); - - sock->read_timeout = sock->mgr->init; - - req = isc__nm_uvreq_get(sock->mgr, sock); - req->handle = isc__nmhandle_get(sock, NULL, &local); - req->cb.accept = sock->accept_cb; - req->cbarg = sock->accept_cbarg; - - isc__nm_acceptcb(sock, req, ISC_R_SUCCESS); - - /* - * sock is now attached to the handle. - */ - isc__nmsocket_detach(&sock); - - return; - -error: - failed_accept_cb(sock, result); +static void +enqueue_stoplistening(isc_nmsocket_t *sock) { + isc__netievent_tcpstop_t *ievent = + isc__nm_get_netievent_tcpstop(sock->mgr, sock); + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); } void @@ -682,11 +631,12 @@ isc__nm_tcp_stoplistening(isc_nmsocket_t *sock) { REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->type == isc_nm_tcplistener); - isc__netievent_tcpstop_t *ievent = - isc__nm_get_ievent(sock->mgr, netievent_tcpstop); - isc__nmsocket_attach(sock, &ievent->sock); - isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); + if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false }, + true)) { + INSIST(0); + ISC_UNREACHABLE(); + } + enqueue_stoplistening(sock); } void @@ -696,69 +646,51 @@ isc__nm_async_tcpstop(isc__networker_t *worker, isc__netievent_t *ev0) { UNUSED(worker); - REQUIRE(isc__nm_in_netthread()); REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(sock->type == isc_nm_tcplistener); + REQUIRE(sock->tid == isc_nm_tid()); + + if (sock->parent != NULL) { + stop_tcp_child(sock); + return; + } /* * If network manager is interlocked, re-enqueue the event for later. */ if (!isc__nm_acquire_interlocked(sock->mgr)) { - isc__netievent_tcpstop_t *event = NULL; - - event = isc__nm_get_ievent(sock->mgr, netievent_tcpstop); - event->sock = sock; - isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)event); + enqueue_stoplistening(sock); } else { - uv_close((uv_handle_t *)&sock->uv_handle.tcp, - tcp_listenclose_cb); + stop_tcp_parent(sock); isc__nm_drop_interlocked(sock->mgr); } } -/* - * This callback is used for closing listening sockets. - */ -static void -tcp_listenclose_cb(uv_handle_t *handle) { - isc_nmsocket_t *sock = uv_handle_get_data(handle); - - LOCK(&sock->lock); - atomic_store(&sock->closed, true); - atomic_store(&sock->listening, false); - sock->pquota = NULL; - UNLOCK(&sock->lock); - - isc__nmsocket_detach(&sock); -} - static void failed_read_cb(isc_nmsocket_t *sock, isc_result_t result) { REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->statichandle != NULL); - if (sock->timer_initialized) { - uv_timer_stop(&sock->timer); - sock->timer_running = false; - } + stop_reading(sock); - if (sock->quota) { - isc_quota_detach(&sock->quota); + if (!sock->recv_read) { + goto destroy; } - - uv_read_stop(&sock->uv_handle.stream); + sock->recv_read = false; if (sock->recv_cb != NULL) { - isc__nm_uvreq_t *req = isc__nm_uvreq_get(sock->mgr, sock); - isc_nmhandle_attach(sock->statichandle, &req->handle); - req->cb.recv = sock->recv_cb; - req->cbarg = sock->recv_cbarg; - + isc__nm_uvreq_t *req = get_read_req(sock); isc__nmsocket_clearcb(sock); - isc__nm_readcb(sock, req, result); } + +destroy: + isc__nmsocket_prep_destroy(sock); + + /* We need to detach from quota after the read callback function had a + * chance to be executed. */ + if (sock->quota) { + isc_quota_detach(&sock->quota); + } } static void @@ -774,22 +706,25 @@ failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, } } +static isc__nm_uvreq_t * +get_read_req(isc_nmsocket_t *sock) { + isc__nm_uvreq_t *req = NULL; + + req = isc__nm_uvreq_get(sock->mgr, sock); + req->cb.recv = sock->recv_cb; + req->cbarg = sock->recv_cbarg; + isc_nmhandle_attach(sock->statichandle, &req->handle); + + return req; +} + static void -readtimeout_cb(uv_timer_t *handle) { - isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)handle); +readtimeout_cb(uv_timer_t *timer) { + isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)timer); REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->tid == isc_nm_tid()); - - /* - * Socket is actively processing something, so restart the timer - * and return. - */ - if (atomic_load(&sock->processing)) { - uv_timer_start(handle, readtimeout_cb, sock->read_timeout, 0); - sock->timer_running = true; - return; - } + REQUIRE(sock->reading); /* * Timeout; stop reading and process whatever we have. @@ -797,41 +732,80 @@ readtimeout_cb(uv_timer_t *handle) { failed_read_cb(sock, ISC_R_TIMEDOUT); } -void -isc__nm_tcp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { - isc_nmsocket_t *sock = handle->sock; - isc__netievent_startread_t *ievent = NULL; +static void +start_sock_timer(isc_nmsocket_t *sock) { + if (sock->read_timeout > 0) { + int r = uv_timer_start(&sock->timer, readtimeout_cb, + sock->read_timeout, 0); + REQUIRE(r == 0); + } +} - REQUIRE(VALID_NMHANDLE(handle)); - REQUIRE(VALID_NMSOCK(handle->sock)); +static void +stop_sock_timer(isc_nmsocket_t *sock) { + int r = uv_timer_stop(&sock->timer); + REQUIRE(r == 0); +} - sock->recv_cb = cb; - sock->recv_cbarg = cbarg; - - if (inactive(sock)) { - isc__nm_incstats(sock->mgr, sock->statsindex[STATID_RECVFAIL]); - failed_read_cb(sock, ISC_R_CANCELED); +static void +start_reading(isc_nmsocket_t *sock) { + if (sock->reading) { return; } - REQUIRE(sock->tid == isc_nm_tid()); + int r = uv_read_start(&sock->uv_handle.stream, tcp_alloc_cb, read_cb); + REQUIRE(r == 0); + sock->reading = true; - sock->read_timeout = (atomic_load(&sock->keepalive) - ? sock->mgr->keepalive - : sock->mgr->idle); + start_sock_timer(sock); +} - ievent = isc__nm_get_ievent(sock->mgr, netievent_tcpstartread); - ievent->sock = sock; - - if (sock->tid == isc_nm_tid()) { - isc__nm_async_tcp_startread(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); - isc__nm_put_ievent(sock->mgr, ievent); - } else { - isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); +static void +stop_reading(isc_nmsocket_t *sock) { + if (!sock->reading) { + return; } + int r = uv_read_stop(&sock->uv_handle.stream); + REQUIRE(r == 0); + sock->reading = false; + + stop_sock_timer(sock); +} + +void +isc__nm_tcp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { + REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); + + isc_nmsocket_t *sock = handle->sock; + isc__netievent_tcpstartread_t *ievent = NULL; + + REQUIRE(sock->type == isc_nm_tcpsocket); + REQUIRE(sock->statichandle == handle); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(!sock->recv_read); + + sock->recv_cb = cb; + sock->recv_cbarg = cbarg; + sock->recv_read = true; + if (sock->read_timeout == 0) { + sock->read_timeout = (atomic_load(&sock->keepalive) + ? sock->mgr->keepalive + : sock->mgr->idle); + } + + ievent = isc__nm_get_netievent_tcpstartread(sock->mgr, sock); + + /* + * This MUST be done asynchronously, no matter which thread we're + * in. The callback function for isc_nm_read() often calls + * isc_nm_read() again; if we tried to do that synchronously + * we'd clash in processbuffer() and grow the stack indefinitely. + */ + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); + return; } @@ -849,7 +823,9 @@ tcp_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf) { REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->type == isc_nm_tcpsocket); REQUIRE(isc__nm_in_netthread()); - REQUIRE(size <= 65536); + if (size > 65536) { + size = 65536; + } worker = &sock->mgr->workers[sock->tid]; INSIST(!worker->recvbuf_inuse); @@ -860,40 +836,31 @@ tcp_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf) { } void -isc__nm_async_tcp_startread(isc__networker_t *worker, isc__netievent_t *ev0) { - isc__netievent_startread_t *ievent = (isc__netievent_startread_t *)ev0; +isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_tcpstartread_t *ievent = + (isc__netievent_tcpstartread_t *)ev0; isc_nmsocket_t *sock = ievent->sock; - int r; - REQUIRE(worker->id == isc_nm_tid()); + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + UNUSED(worker); if (inactive(sock)) { + sock->reading = true; failed_read_cb(sock, ISC_R_CANCELED); return; } - r = uv_read_start(&sock->uv_handle.stream, tcp_alloc_cb, read_cb); - if (r != 0) { - isc__nm_incstats(sock->mgr, sock->statsindex[STATID_RECVFAIL]); - failed_read_cb(sock, ISC_R_CANCELED); - return; - } - - if (sock->read_timeout != 0) { - if (!sock->timer_initialized) { - uv_timer_init(&worker->loop, &sock->timer); - sock->timer_initialized = true; - } - uv_handle_set_data((uv_handle_t *)&sock->timer, sock); - uv_timer_start(&sock->timer, readtimeout_cb, sock->read_timeout, - 0); - sock->timer_running = true; - } + start_reading(sock); } void -isc__nm_tcp_pauseread(isc_nmsocket_t *sock) { - isc__netievent_pauseread_t *ievent = NULL; +isc__nm_tcp_pauseread(isc_nmhandle_t *handle) { + REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); + + isc__netievent_tcppauseread_t *ievent = NULL; + isc_nmsocket_t *sock = handle->sock; REQUIRE(VALID_NMSOCK(sock)); @@ -902,48 +869,44 @@ isc__nm_tcp_pauseread(isc_nmsocket_t *sock) { return; } - ievent = isc__nm_get_ievent(sock->mgr, netievent_tcppauseread); - ievent->sock = sock; + ievent = isc__nm_get_netievent_tcppauseread(sock->mgr, sock); - if (sock->tid == isc_nm_tid()) { - isc__nm_async_tcp_pauseread(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); - isc__nm_put_ievent(sock->mgr, ievent); - } else { - isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); - } + isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); return; } void -isc__nm_async_tcp_pauseread(isc__networker_t *worker, isc__netievent_t *ev0) { - isc__netievent_pauseread_t *ievent = (isc__netievent_pauseread_t *)ev0; +isc__nm_async_tcppauseread(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_tcppauseread_t *ievent = + (isc__netievent_tcppauseread_t *)ev0; isc_nmsocket_t *sock = ievent->sock; REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(worker->id == isc_nm_tid()); + REQUIRE(sock->tid == isc_nm_tid()); + UNUSED(worker); - if (sock->timer_running) { - uv_timer_stop(&sock->timer); - sock->timer_running = false; - } - uv_read_stop(&sock->uv_handle.stream); + stop_reading(sock); } void -isc__nm_tcp_resumeread(isc_nmsocket_t *sock) { - isc__netievent_startread_t *ievent = NULL; +isc__nm_tcp_resumeread(isc_nmhandle_t *handle) { + REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); + + isc__netievent_tcpstartread_t *ievent = NULL; + isc_nmsocket_t *sock = handle->sock; - REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->tid == isc_nm_tid()); if (sock->recv_cb == NULL) { + /* We are no longer reading */ return; } if (!isc__nmsocket_active(sock)) { + sock->reading = true; failed_read_cb(sock, ISC_R_CANCELED); return; } @@ -953,64 +916,63 @@ isc__nm_tcp_resumeread(isc_nmsocket_t *sock) { return; } - ievent = isc__nm_get_ievent(sock->mgr, netievent_tcpstartread); - ievent->sock = sock; + ievent = isc__nm_get_netievent_tcpstartread(sock->mgr, sock); - if (sock->tid == isc_nm_tid()) { - isc__nm_async_tcp_startread(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); - isc__nm_put_ievent(sock->mgr, ievent); - } else { - isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); - } + isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); } static void read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)stream); + isc__nm_uvreq_t *req; REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(sock->reading); REQUIRE(buf != NULL); - if (nread >= 0) { - if (sock->recv_cb != NULL) { - isc__nm_uvreq_t *req = isc__nm_uvreq_get(sock->mgr, - sock); - req->cb.recv = sock->recv_cb; - req->cbarg = sock->recv_cbarg; - isc_nmhandle_attach(sock->statichandle, &req->handle); + if (inactive(sock)) { + failed_read_cb(sock, ISC_R_CANCELED); + goto free; + } - /* - * The callback will be called synchronously because the - * result is ISC_R_SUCCESS, so we don't need to retain - * the buffer - */ - req->uvbuf.base = buf->base; - req->uvbuf.len = nread; - - isc__nm_readcb(sock, req, ISC_R_SUCCESS); - } - - if (sock->timer_initialized && sock->read_timeout != 0) { - /* The timer will be updated */ - uv_timer_start(&sock->timer, readtimeout_cb, - sock->read_timeout, 0); - sock->timer_running = true; - } - } else { - /* - * This might happen if the inner socket is closing. It means - * that it's detached, so the socket will be closed. - */ + if (nread < 0) { if (nread != UV_EOF) { isc__nm_incstats(sock->mgr, sock->statsindex[STATID_RECVFAIL]); } - failed_read_cb(sock, ISC_R_EOF); + failed_read_cb(sock, isc__nm_uverr2result(nread)); + + goto free; } + + req = get_read_req(sock); + + /* + * The callback will be called synchronously because the + * result is ISC_R_SUCCESS, so we don't need to retain + * the buffer + */ + req->uvbuf.base = buf->base; + req->uvbuf.len = nread; + + if (!atomic_load(&sock->client)) { + sock->read_timeout = (atomic_load(&sock->keepalive) + ? sock->mgr->keepalive + : sock->mgr->idle); + } + + isc__nm_readcb(sock, req, ISC_R_SUCCESS); + + /* The readcb could have paused the reading */ + if (sock->reading) { + /* The timer will be updated */ + start_sock_timer(sock); + } + +free: isc__nm_free_uvbuf(sock, buf); } @@ -1024,11 +986,9 @@ quota_accept_cb(isc_quota_t *quota, void *sock0) { /* * Create a tcpaccept event and pass it using the async channel. */ - ievent = isc__nm_get_ievent(sock->mgr, netievent_tcpaccept); - ievent->sock = sock; - ievent->quota = quota; - isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); + ievent = isc__nm_get_netievent_tcpaccept(sock->mgr, sock, quota); + isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); } /* @@ -1036,12 +996,16 @@ quota_accept_cb(isc_quota_t *quota, void *sock0) { */ void isc__nm_async_tcpaccept(isc__networker_t *worker, isc__netievent_t *ev0) { - isc_result_t result; isc__netievent_tcpaccept_t *ievent = (isc__netievent_tcpaccept_t *)ev0; + isc_nmsocket_t *sock = ievent->sock; + isc_result_t result; - REQUIRE(worker->id == ievent->sock->tid); + UNUSED(worker); - result = accept_connection(ievent->sock, ievent->quota); + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + + result = accept_connection(sock, ievent->quota); if (result != ISC_R_SUCCESS && result != ISC_R_NOCONN) { if ((result != ISC_R_QUOTA && result != ISC_R_SOFTQUOTA) || can_log_tcp_quota()) { @@ -1051,134 +1015,126 @@ isc__nm_async_tcpaccept(isc__networker_t *worker, isc__netievent_t *ev0) { isc_result_totext(result)); } } - - /* - * The socket was attached just before we called isc_quota_attach_cb(). - */ - isc__nmsocket_detach(&ievent->sock); -} - -/* - * Close callback for uv_tcp_t strutures created in accept_connection(). - */ -static void -free_uvtcpt(uv_handle_t *uvs) { - isc_mem_t *mctx = (isc_mem_t *)uv_handle_get_data(uvs); - isc_mem_putanddetach(&mctx, uvs, sizeof(uv_tcp_t)); } static isc_result_t accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota) { - isc_result_t result; - isc__netievent_tcpchildaccept_t *event = NULL; - isc__networker_t *worker = NULL; - uv_tcp_t *uvstream = NULL; isc_nmsocket_t *csock = NULL; - isc_mem_t *mctx = NULL; - int r, w; + isc__networker_t *worker = NULL; + int r; + isc_result_t result; + struct sockaddr_storage ss; + isc_sockaddr_t local; + isc_nmhandle_t *handle; REQUIRE(VALID_NMSOCK(ssock)); + REQUIRE(ssock->tid == isc_nm_tid()); - if (!isc__nmsocket_active(ssock) || atomic_load(&ssock->mgr->closing)) { - /* We're closing, bail */ + if (inactive(ssock)) { if (quota != NULL) { isc_quota_detach("a); } return (ISC_R_CANCELED); } - /* We can be called directly or as a callback from quota */ - if (ssock->pquota != NULL && quota == NULL) { - /* - * We need to attach to ssock, because it might be queued - * waiting for a TCP quota slot. If so, then we'll detach it - * later when the connection is accepted. (XXX: This may be - * suboptimal, it might be better not to attach unless - * we need to - but we risk a race then.) - */ - isc_nmsocket_t *tsock = NULL; - isc__nmsocket_attach(ssock, &tsock); - result = isc_quota_attach_cb(ssock->pquota, "a, - &ssock->quotacb); - if (result == ISC_R_QUOTA) { - isc__nm_incstats(ssock->mgr, - ssock->statsindex[STATID_ACCEPTFAIL]); - return (result); - } - - /* - * We're under quota, so there's no need to wait; - * Detach the socket. - */ - isc__nmsocket_detach(&tsock); - } - - isc__nm_incstats(ssock->mgr, ssock->statsindex[STATID_ACCEPT]); - - worker = &ssock->mgr->workers[isc_nm_tid()]; - uvstream = isc_mem_get(ssock->mgr->mctx, sizeof(uv_tcp_t)); - - isc_mem_attach(ssock->mgr->mctx, &mctx); - uv_handle_set_data((uv_handle_t *)uvstream, mctx); - mctx = NULL; /* Detached later in free_uvtcpt() */ - - uv_tcp_init(&worker->loop, uvstream); - - r = uv_accept(&ssock->uv_handle.stream, (uv_stream_t *)uvstream); - if (r != 0) { - result = isc__nm_uverr2result(r); - uv_close((uv_handle_t *)uvstream, free_uvtcpt); - if (quota != NULL) { - isc_quota_detach("a); - } - return (result); - } - - /* We have an accepted TCP socket, pass it to a random worker */ - w = isc_random_uniform(ssock->mgr->nworkers); - event = isc__nm_get_ievent(ssock->mgr, netievent_tcpchildaccept); - - /* Duplicate the server socket */ - r = isc_uv_export((uv_stream_t *)uvstream, &event->streaminfo); - if (r != 0) { - result = isc_errno_toresult(errno); - uv_close((uv_handle_t *)uvstream, free_uvtcpt); - if (quota != NULL) { - isc_quota_detach("a); - } - isc__nm_put_ievent(ssock->mgr, event); - return (result); - } - csock = isc_mem_get(ssock->mgr->mctx, sizeof(isc_nmsocket_t)); isc__nmsocket_init(csock, ssock->mgr, isc_nm_tcpsocket, ssock->iface); - csock->tid = w; + csock->tid = ssock->tid; csock->extrahandlesize = ssock->extrahandlesize; isc__nmsocket_attach(ssock, &csock->server); - csock->accept_cb = ssock->accept_cb; - csock->accept_cbarg = ssock->accept_cbarg; + csock->recv_cb = ssock->recv_cb; + csock->recv_cbarg = ssock->recv_cbarg; + csock->quota = quota; csock->accepting = true; - event->sock = csock; - event->quota = quota; + worker = &csock->mgr->workers[isc_nm_tid()]; - uv_close((uv_handle_t *)uvstream, free_uvtcpt); + r = uv_tcp_init(&worker->loop, &csock->uv_handle.tcp); + RUNTIME_CHECK(r == 0); + uv_handle_set_data(&csock->uv_handle.handle, csock); - if (w == isc_nm_tid()) { - isc__nm_async_tcpchildaccept(&ssock->mgr->workers[w], - (isc__netievent_t *)event); - isc__nm_put_ievent(ssock->mgr, event); - } else { - isc__nm_enqueue_ievent(&ssock->mgr->workers[w], - (isc__netievent_t *)event); + r = uv_timer_init(&worker->loop, &csock->timer); + RUNTIME_CHECK(r == 0); + uv_handle_set_data((uv_handle_t *)&csock->timer, csock); + + r = uv_accept(&ssock->uv_handle.stream, &csock->uv_handle.stream); + if (r != 0) { + result = isc__nm_uverr2result(r); + goto failure; } + r = uv_tcp_getpeername(&csock->uv_handle.tcp, (struct sockaddr *)&ss, + &(int){ sizeof(ss) }); + if (r != 0) { + result = isc__nm_uverr2result(r); + goto failure; + } + + result = isc_sockaddr_fromsockaddr(&csock->peer, + (struct sockaddr *)&ss); + if (result != ISC_R_SUCCESS) { + goto failure; + } + + r = uv_tcp_getsockname(&csock->uv_handle.tcp, (struct sockaddr *)&ss, + &(int){ sizeof(ss) }); + if (r != 0) { + result = isc__nm_uverr2result(r); + goto failure; + } + + result = isc_sockaddr_fromsockaddr(&local, (struct sockaddr *)&ss); + if (result != ISC_R_SUCCESS) { + goto failure; + } + + handle = isc__nmhandle_get(csock, NULL, &local); + + result = ssock->accept_cb(handle, ISC_R_SUCCESS, ssock->accept_cbarg); + if (result != ISC_R_SUCCESS) { + isc_nmhandle_detach(&handle); + goto failure; + } + + csock->accepting = false; + + isc__nm_incstats(csock->mgr, csock->statsindex[STATID_ACCEPT]); + + csock->read_timeout = csock->mgr->init; + + atomic_fetch_add(&ssock->parent->active_child_connections, 1); + + /* + * The acceptcb needs to attach to the handle if it wants to keep the + * connection alive + */ + isc_nmhandle_detach(&handle); + + /* + * sock is now attached to the handle. + */ + isc__nmsocket_detach(&csock); + return (ISC_R_SUCCESS); + +failure: + atomic_store(&csock->active, false); + + failed_accept_cb(csock, result); + + isc__nmsocket_prep_destroy(csock); + + isc__nmsocket_detach(&csock); + + return (result); } void isc__nm_tcp_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb, void *cbarg) { + REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); + isc_nmsocket_t *sock = handle->sock; isc__netievent_tcpsend_t *ievent = NULL; isc__nm_uvreq_t *uvreq = NULL; @@ -1194,40 +1150,15 @@ isc__nm_tcp_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb, uvreq->cb.send = cb; uvreq->cbarg = cbarg; - if (inactive(sock)) { - isc__nm_incstats(sock->mgr, sock->statsindex[STATID_SENDFAIL]); - failed_send_cb(sock, uvreq, ISC_R_CANCELED); - return; - } + ievent = isc__nm_get_netievent_tcpsend(sock->mgr, sock, uvreq); + isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); - if (sock->tid == isc_nm_tid()) { - /* - * If we're in the same thread as the socket we can send the - * data directly - */ - isc_result_t result = tcp_send_direct(sock, uvreq); - if (result != ISC_R_SUCCESS) { - isc__nm_incstats(sock->mgr, - sock->statsindex[STATID_SENDFAIL]); - failed_send_cb(sock, uvreq, result); - } - } else { - /* - * We need to create an event and pass it using async channel - */ - ievent = isc__nm_get_ievent(sock->mgr, netievent_tcpsend); - ievent->sock = sock; - ievent->req = uvreq; - - isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); - } return; } static void tcp_send_cb(uv_write_t *req, int status) { - isc_result_t result = ISC_R_SUCCESS; isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data; isc_nmsocket_t *sock = uvreq->sock; @@ -1235,12 +1166,12 @@ tcp_send_cb(uv_write_t *req, int status) { REQUIRE(VALID_NMHANDLE(uvreq->handle)); if (status < 0) { - result = isc__nm_uverr2result(status); isc__nm_incstats(sock->mgr, sock->statsindex[STATID_SENDFAIL]); + failed_send_cb(sock, uvreq, isc__nm_uverr2result(status)); + return; } - uvreq->cb.send(uvreq->handle, result, uvreq->cbarg); - isc__nm_uvreq_put(&uvreq, sock); + isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS); } /* @@ -1254,25 +1185,25 @@ isc__nm_async_tcpsend(isc__networker_t *worker, isc__netievent_t *ev0) { isc__nm_uvreq_t *uvreq = ievent->req; REQUIRE(sock->type == isc_nm_tcpsocket); - REQUIRE(worker->id == ievent->sock->tid); + REQUIRE(sock->tid == isc_nm_tid()); + UNUSED(worker); result = tcp_send_direct(sock, uvreq); if (result != ISC_R_SUCCESS) { isc__nm_incstats(sock->mgr, sock->statsindex[STATID_SENDFAIL]); - uvreq->cb.send(uvreq->handle, result, uvreq->cbarg); - isc__nm_uvreq_put(&uvreq, sock); + failed_send_cb(sock, uvreq, result); } } static isc_result_t tcp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { - int r; - REQUIRE(VALID_NMSOCK(sock)); REQUIRE(VALID_UVREQ(req)); REQUIRE(sock->tid == isc_nm_tid()); REQUIRE(sock->type == isc_nm_tcpsocket); + int r; + if (inactive(sock)) { return (ISC_R_CANCELED); } @@ -1287,59 +1218,137 @@ tcp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { } static void -tcp_close_cb(uv_handle_t *uvhandle) { - isc_nmsocket_t *sock = uv_handle_get_data(uvhandle); +tcp_stop_cb(uv_handle_t *handle) { + isc_nmsocket_t *sock = uv_handle_get_data(handle); + uv_handle_set_data(handle, NULL); REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(atomic_load(&sock->closing)); + + if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false }, + true)) { + INSIST(0); + ISC_UNREACHABLE(); + } + + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CLOSE]); + + atomic_store(&sock->listening, false); + + isc__nmsocket_detach(&sock); +} + +static void +tcp_close_cb(uv_handle_t *handle) { + isc_nmsocket_t *sock = uv_handle_get_data(handle); + uv_handle_set_data(handle, NULL); + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(atomic_load(&sock->closing)); + + if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false }, + true)) { + INSIST(0); + ISC_UNREACHABLE(); + } isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CLOSE]); - atomic_store(&sock->closed, true); - atomic_store(&sock->connected, false); if (sock->server != NULL) { isc__nmsocket_detach(&sock->server); } + atomic_store(&sock->connected, false); + isc__nmsocket_prep_destroy(sock); } static void -timer_close_cb(uv_handle_t *uvhandle) { - uv_handle_t *handle = uv_handle_get_data(uvhandle); +timer_close_cb(uv_handle_t *handle) { + isc_nmsocket_t *sock = uv_handle_get_data(handle); + uv_handle_set_data(handle, NULL); - uv_close(handle, tcp_close_cb); + if (sock->parent) { + uv_close(&sock->uv_handle.handle, tcp_stop_cb); + } else { + uv_close(&sock->uv_handle.handle, tcp_close_cb); + } +} + +static void +stop_tcp_child(isc_nmsocket_t *sock) { + REQUIRE(sock->type == isc_nm_tcpsocket); + REQUIRE(sock->tid == isc_nm_tid()); + + if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false }, + true)) { + return; + } + + tcp_close_direct(sock); + + LOCK(&sock->parent->lock); + sock->parent->rchildren -= 1; + UNLOCK(&sock->parent->lock); + BROADCAST(&sock->parent->cond); +} + +static void +stop_tcp_parent(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tcplistener); + + for (int i = 0; i < sock->nchildren; i++) { + isc__netievent_tcpstop_t *ievent = NULL; + isc_nmsocket_t *csock = &sock->children[i]; + REQUIRE(VALID_NMSOCK(csock)); + + atomic_store(&csock->active, false); + + if (csock->tid == isc_nm_tid()) { + stop_tcp_child(csock); + continue; + } + + ievent = isc__nm_get_netievent_tcpstop(sock->mgr, csock); + isc__nm_enqueue_ievent(&sock->mgr->workers[csock->tid], + (isc__netievent_t *)ievent); + } + + LOCK(&sock->lock); + while (sock->rchildren > 0) { + WAIT(&sock->cond, &sock->lock); + } + atomic_store(&sock->closed, true); + UNLOCK(&sock->lock); + + isc__nmsocket_prep_destroy(sock); } static void tcp_close_direct(isc_nmsocket_t *sock) { REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->tid == isc_nm_tid()); - REQUIRE(sock->type == isc_nm_tcpsocket); + REQUIRE(atomic_load(&sock->closing)); + + if (sock->server != NULL) { + REQUIRE(VALID_NMSOCK(sock->server)); + REQUIRE(VALID_NMSOCK(sock->server->parent)); + if (sock->server->parent != NULL) { + atomic_fetch_sub( + &sock->server->parent->active_child_connections, + 1); + } + } if (sock->quota != NULL) { isc_quota_detach(&sock->quota); } - uv_read_stop((uv_stream_t *)&sock->uv_handle.handle); - - if (sock->timer_running) { - uv_timer_stop(&sock->timer); - sock->timer_running = false; - } - - if (sock->timer_initialized) { - sock->timer_initialized = false; - /* - * The read and timer is stopped and the socket will be - * scheduled to be closed, so we can override the data that the - * timer handle holds. - */ - uv_handle_set_data((uv_handle_t *)&sock->timer, - &sock->uv_handle.handle); - uv_close((uv_handle_t *)&sock->timer, timer_close_cb); - } else { - uv_close(&sock->uv_handle.handle, tcp_close_cb); - } + stop_reading(sock); + uv_close((uv_handle_t *)&sock->timer, timer_close_cb); } void @@ -1360,9 +1369,8 @@ isc__nm_tcp_close(isc_nmsocket_t *sock) { * We need to create an event and pass it using async channel */ isc__netievent_tcpclose_t *ievent = - isc__nm_get_ievent(sock->mgr, netievent_tcpclose); + isc__nm_get_netievent_tcpclose(sock->mgr, sock); - ievent->sock = sock; isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)ievent); } @@ -1385,14 +1393,7 @@ void isc__nm_tcp_shutdown(isc_nmsocket_t *sock) { REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->tid == isc_nm_tid()); - - if (sock->type != isc_nm_tcpsocket) { - return; - } - - if (atomic_load(&sock->connecting)) { - return; - } + REQUIRE(sock->type == isc_nm_tcpsocket); /* * If the socket is active, mark it inactive and @@ -1402,13 +1403,20 @@ isc__nm_tcp_shutdown(isc_nmsocket_t *sock) { return; } - if (sock->accepting) { - failed_accept_cb(sock, ISC_R_CANCELED); + if (atomic_load(&sock->connecting) || sock->accepting) { return; } - if (sock->statichandle != NULL) { + if (sock->statichandle) { failed_read_cb(sock, ISC_R_CANCELED); + return; + } + + /* + * Otherwise, we just send the socket to abyss... + */ + if (sock->parent == NULL) { + isc__nmsocket_prep_destroy(sock); } } @@ -1424,9 +1432,7 @@ isc__nm_tcp_cancelread(isc_nmhandle_t *handle) { REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->type == isc_nm_tcpsocket); - ievent = isc__nm_get_ievent(sock->mgr, netievent_tcpcancel); - ievent->sock = sock; - isc_nmhandle_attach(handle, &ievent->handle); + ievent = isc__nm_get_netievent_tcpcancel(sock->mgr, sock, handle); isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)ievent); } @@ -1435,19 +1441,14 @@ void isc__nm_async_tcpcancel(isc__networker_t *worker, isc__netievent_t *ev0) { isc__netievent_tcpcancel_t *ievent = (isc__netievent_tcpcancel_t *)ev0; isc_nmsocket_t *sock = ievent->sock; - isc_nmhandle_t *handle = ievent->handle; REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(worker->id == sock->tid); REQUIRE(sock->tid == isc_nm_tid()); + UNUSED(worker); - uv_read_stop(&sock->uv_handle.stream); + uv_timer_stop(&sock->timer); - if (atomic_load(&sock->client)) { - failed_read_cb(sock, ISC_R_EOF); - } - - isc_nmhandle_detach(&handle); + failed_read_cb(sock, ISC_R_EOF); } void @@ -1459,8 +1460,18 @@ isc__nm_tcp_settimeout(isc_nmhandle_t *handle, uint32_t timeout) { sock = handle->sock; sock->read_timeout = timeout; - if (sock->timer_running) { - uv_timer_start(&sock->timer, readtimeout_cb, sock->read_timeout, - 0); + if (uv_is_active((uv_handle_t *)&sock->timer)) { + start_sock_timer(sock); } } + +int_fast32_t +isc__nm_tcp_listener_nactive(isc_nmsocket_t *listener) { + int_fast32_t nactive; + + REQUIRE(VALID_NMSOCK(listener)); + + nactive = atomic_load(&listener->active_child_connections); + INSIST(nactive >= 0); + return nactive; +} diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c index f742c64a65..8cc0c4a654 100644 --- a/lib/isc/netmgr/tcpdns.c +++ b/lib/isc/netmgr/tcpdns.c @@ -9,20 +9,25 @@ * information regarding copyright ownership. */ +#include #include #include #include #include #include +#include +#include #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -37,20 +42,76 @@ * changed in the future. */ +static atomic_uint_fast32_t last_tcpdnsquota_log = ATOMIC_VAR_INIT(0); + +static bool +can_log_tcpdns_quota(void) { + isc_stdtime_t now, last; + + isc_stdtime_get(&now); + last = atomic_exchange_relaxed(&last_tcpdnsquota_log, now); + if (now != last) { + return (true); + } + + return (false); +} + static void -dnslisten_readcb(isc_nmhandle_t *handle, isc_result_t eresult, - isc_region_t *region, void *arg); +tcpdns_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf); static void resume_processing(void *arg); +static isc_result_t +tcpdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req); + static void tcpdns_close_direct(isc_nmsocket_t *sock); -static inline size_t -dnslen(unsigned char *base) { - return ((base[0] << 8) + (base[1])); -} +static isc_result_t +tcpdns_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req); +static void +tcpdns_connect_cb(uv_connect_t *uvreq, int status); + +static void +tcpdns_connection_cb(uv_stream_t *server, int status); + +static void +read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf); + +static void +tcpdns_close_cb(uv_handle_t *uvhandle); + +static isc_result_t +accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota); + +static void +quota_accept_cb(isc_quota_t *quota, void *sock0); + +static void +failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult); + +static void +failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, + isc_result_t eresult); + +static void +stop_tcpdns_parent(isc_nmsocket_t *sock); +static void +stop_tcpdns_child(isc_nmsocket_t *sock); + +static void +start_sock_timer(isc_nmsocket_t *sock); + +static void +process_sock_buffer(isc_nmsocket_t *sock); + +static void +stop_reading(isc_nmsocket_t *sock); + +static isc__nm_uvreq_t * +get_read_req(isc_nmsocket_t *sock); /* * Regular TCP buffer, should suffice in most cases. @@ -79,93 +140,772 @@ alloc_dnsbuf(isc_nmsocket_t *sock, size_t len) { } } -static void -timer_close_cb(uv_handle_t *handle) { - isc_nmsocket_t *sock = (isc_nmsocket_t *)uv_handle_get_data(handle); - - REQUIRE(VALID_NMSOCK(sock)); - - atomic_store(&sock->closed, true); - tcpdns_close_direct(sock); +static bool +inactive(isc_nmsocket_t *sock) { + return (!isc__nmsocket_active(sock) || atomic_load(&sock->closing) || + atomic_load(&sock->mgr->closing) || + (sock->server != NULL && !isc__nmsocket_active(sock->server))); } static void -dnstcp_readtimeout(uv_timer_t *timer) { - isc_nmsocket_t *sock = - (isc_nmsocket_t *)uv_handle_get_data((uv_handle_t *)timer); +failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult) { + REQUIRE(sock->accepting); + REQUIRE(sock->server); - REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(sock->tid == isc_nm_tid()); + /* + * Detach the quota early to make room for other connections; + * otherwise it'd be detached later asynchronously, and clog + * the quota unnecessarily. + */ + if (sock->quota != NULL) { + isc_quota_detach(&sock->quota); + } - /* Close the TCP connection; its closure should fire ours. */ - if (sock->outerhandle != NULL) { - isc_nmhandle_detach(&sock->outerhandle); + isc__nmsocket_detach(&sock->server); + + sock->accepting = false; + + switch (eresult) { + case ISC_R_NOTCONNECTED: + /* IGNORE: The client disconnected before we could accept */ + break; + default: + isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR, + "Accepting TCP connection failed: %s", + isc_result_totext(eresult)); } } -/* - * Accept callback for TCP-DNS connection. - */ +static void +failed_connect_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, + isc_result_t eresult) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(VALID_UVREQ(req)); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(atomic_load(&sock->connecting)); + REQUIRE(req->cb.connect != NULL); + + atomic_store(&sock->connecting, false); + + isc__nmsocket_clearcb(sock); + isc__nm_connectcb(sock, req, eresult); + + isc__nmsocket_prep_destroy(sock); +} + static isc_result_t -dnslisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { - isc_nmsocket_t *dnslistensock = (isc_nmsocket_t *)cbarg; - isc_nmsocket_t *dnssock = NULL; - isc_nmhandle_t *readhandle = NULL; - isc_nm_accept_cb_t accept_cb; - void *accept_cbarg; +tcpdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { + isc__networker_t *worker = NULL; + int r; - REQUIRE(VALID_NMSOCK(dnslistensock)); - REQUIRE(dnslistensock->type == isc_nm_tcpdnslistener); + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(VALID_UVREQ(req)); + REQUIRE(isc__nm_in_netthread()); + REQUIRE(sock->tid == isc_nm_tid()); + + worker = &sock->mgr->workers[sock->tid]; + + atomic_store(&sock->connecting, true); + + r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp); + RUNTIME_CHECK(r == 0); + uv_handle_set_data(&sock->uv_handle.handle, sock); + + r = uv_timer_init(&worker->loop, &sock->timer); + RUNTIME_CHECK(r == 0); + uv_handle_set_data((uv_handle_t *)&sock->timer, sock); + + r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd); + if (r != 0) { + isc__nm_closesocket(sock->fd); + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]); + goto failure; + } + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]); + + if (req->local.length != 0) { + r = uv_tcp_bind(&sock->uv_handle.tcp, &req->local.type.sa, 0); + /* + * In case of shared socket UV_EINVAL will be returned and needs + * to be ignored + */ + if (r != 0 && r != UV_EINVAL) { + isc__nm_incstats(sock->mgr, + sock->statsindex[STATID_BINDFAIL]); + goto failure; + } + } + + uv_handle_set_data(&req->uv_req.handle, req); + r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp, + &req->peer.type.sa, tcpdns_connect_cb); + if (r != 0) { + isc__nm_incstats(sock->mgr, + sock->statsindex[STATID_CONNECTFAIL]); + goto failure; + } + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CONNECT]); + + atomic_store(&sock->connected, true); + + return (ISC_R_SUCCESS); + +failure: + atomic_store(&sock->active, false); + + isc__nm_tcpdns_close(sock); + + return (isc__nm_uverr2result(r)); +} + +void +isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_tcpdnsconnect_t *ievent = + (isc__netievent_tcpdnsconnect_t *)ev0; + isc_nmsocket_t *sock = ievent->sock; + isc__nm_uvreq_t *req = ievent->req; + isc_result_t result = ISC_R_SUCCESS; + + UNUSED(worker); + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tcpdnssocket); + REQUIRE(sock->iface != NULL); + REQUIRE(sock->parent == NULL); + REQUIRE(sock->tid == isc_nm_tid()); + + result = tcpdns_connect_direct(sock, req); + if (result == ISC_R_SUCCESS) { + atomic_store(&sock->connected, true); + /* The connect cb will be executed in tcpdns_connect_cb() */ + } else { + isc__nm_uvreq_put(&req, sock); + } + + LOCK(&sock->lock); + sock->result = result; + SIGNAL(&sock->cond); + if (!atomic_load(&sock->active)) { + WAIT(&sock->scond, &sock->lock); + } + INSIST(atomic_load(&sock->active)); + UNLOCK(&sock->lock); + + /* + * The sock is now attached to the handle. + */ + isc__nmsocket_detach(&sock); +} + +static void +tcpdns_connect_cb(uv_connect_t *uvreq, int status) { + isc_result_t result; + isc__nm_uvreq_t *req = NULL; + isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle); + struct sockaddr_storage ss; + int r; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(atomic_load(&sock->connecting)); + + req = uv_handle_get_data((uv_handle_t *)uvreq); + + REQUIRE(VALID_UVREQ(req)); + REQUIRE(VALID_NMHANDLE(req->handle)); + + /* Socket was closed midflight by isc__nm_tcpdns_shutdown() */ + if (!isc__nmsocket_active(sock)) { + result = ISC_R_CANCELED; + goto error; + } + + if (status != 0) { + result = isc__nm_uverr2result(status); + goto error; + } + + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CONNECT]); + r = uv_tcp_getpeername(&sock->uv_handle.tcp, (struct sockaddr *)&ss, + &(int){ sizeof(ss) }); + if (r != 0) { + result = isc__nm_uverr2result(r); + goto error; + } + + atomic_store(&sock->connecting, false); + + result = isc_sockaddr_fromsockaddr(&sock->peer, (struct sockaddr *)&ss); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + + isc__nm_connectcb(sock, req, ISC_R_SUCCESS); + + return; + +error: + failed_connect_cb(sock, req, result); +} + +isc_result_t +isc_nm_tcpdnsconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer, + isc_nm_cb_t cb, void *cbarg, unsigned int timeout, + size_t extrahandlesize) { + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *sock = NULL; + isc__netievent_tcpdnsconnect_t *ievent = NULL; + isc__nm_uvreq_t *req = NULL; + sa_family_t sa_family; + uv_os_sock_t fd; + + REQUIRE(VALID_NM(mgr)); + REQUIRE(local != NULL); + REQUIRE(peer != NULL); + + sa_family = peer->addr.type.sa.sa_family; + + /* + * The socket() call can fail spuriously on FreeBSD 12, so we need to + * handle the failure early and gracefully. + */ + result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &fd); if (result != ISC_R_SUCCESS) { return (result); } - accept_cb = dnslistensock->accept_cb; - accept_cbarg = dnslistensock->accept_cbarg; + sock = isc_mem_get(mgr->mctx, sizeof(*sock)); + isc__nmsocket_init(sock, mgr, isc_nm_tcpdnssocket, local); - if (accept_cb != NULL) { - result = accept_cb(handle, ISC_R_SUCCESS, accept_cbarg); - if (result != ISC_R_SUCCESS) { - return (result); + atomic_init(&sock->active, false); + sock->extrahandlesize = extrahandlesize; + sock->connect_timeout = timeout; + sock->result = ISC_R_DEFAULT; + sock->fd = fd; + atomic_init(&sock->client, true); + + result = isc__nm_socket_connectiontimeout(fd, timeout); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + + req = isc__nm_uvreq_get(mgr, sock); + req->cb.connect = cb; + req->cbarg = cbarg; + req->peer = peer->addr; + req->local = local->addr; + req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface->addr); + + ievent = isc__nm_get_netievent_tcpdnsconnect(mgr, sock, req); + + if (isc__nm_in_netthread()) { + atomic_store(&sock->active, true); + sock->tid = isc_nm_tid(); + isc__nm_async_tcpdnsconnect(&mgr->workers[sock->tid], + (isc__netievent_t *)ievent); + isc__nm_put_netievent_tcpdnsconnect(mgr, ievent); + } else { + sock->tid = isc_random_uniform(mgr->nworkers); + isc__nm_enqueue_ievent(&mgr->workers[sock->tid], + (isc__netievent_t *)ievent); + } + LOCK(&sock->lock); + result = sock->result; + while (result == ISC_R_DEFAULT) { + WAIT(&sock->cond, &sock->lock); + result = sock->result; + } + atomic_store(&sock->active, true); + BROADCAST(&sock->scond); + UNLOCK(&sock->lock); + INSIST(result != ISC_R_DEFAULT); + + return (result); +} + +static isc_result_t +isc__nm_tcpdns_lb_socket(sa_family_t sa_family, uv_os_sock_t *sockp) { + isc_result_t result; + uv_os_sock_t sock; + + result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock); + REQUIRE(result == ISC_R_SUCCESS); + + (void)isc__nm_socket_incoming_cpu(sock); + + /* FIXME: set mss */ + + result = isc__nm_socket_reuse(sock); + REQUIRE(result == ISC_R_SUCCESS || result == ISC_R_NOTIMPLEMENTED); + + result = isc__nm_socket_reuse_lb(sock); + REQUIRE(result == ISC_R_SUCCESS || result == ISC_R_NOTIMPLEMENTED); + + *sockp = sock; + + return (result); +} + +isc_result_t +isc_nm_listentcpdns(isc_nm_t *mgr, isc_nmiface_t *iface, + isc_nm_recv_cb_t recv_cb, void *recv_cbarg, + isc_nm_accept_cb_t accept_cb, void *accept_cbarg, + size_t extrahandlesize, int backlog, isc_quota_t *quota, + isc_nmsocket_t **sockp) { + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *sock = NULL; + sa_family_t sa_family = iface->addr.type.sa.sa_family; + size_t children_size = 0; + + REQUIRE(VALID_NM(mgr)); + + sock = isc_mem_get(mgr->mctx, sizeof(*sock)); + isc__nmsocket_init(sock, mgr, isc_nm_tcpdnslistener, iface); + + sock->rchildren = 0; + sock->nchildren = mgr->nworkers; + children_size = sock->nchildren * sizeof(sock->children[0]); + sock->children = isc_mem_get(mgr->mctx, children_size); + memset(sock->children, 0, children_size); + + sock->result = ISC_R_DEFAULT; + sock->tid = isc_random_uniform(mgr->nworkers); + sock->fd = -1; + + for (size_t i = 0; i < mgr->nworkers; i++) { + isc__netievent_tcpdnslisten_t *ievent = NULL; + isc_nmsocket_t *csock = &sock->children[i]; + + isc__nmsocket_init(csock, mgr, isc_nm_tcpdnssocket, iface); + csock->parent = sock; + csock->accept_cb = accept_cb; + csock->accept_cbarg = accept_cbarg; + csock->recv_cb = recv_cb; + csock->recv_cbarg = recv_cbarg; + csock->extrahandlesize = extrahandlesize; + csock->backlog = backlog; + csock->tid = i; + /* + * We don't attach to quota, just assign - to avoid + * increasing quota unnecessarily. + */ + csock->pquota = quota; + isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock); + + result = isc__nm_tcpdns_lb_socket(sa_family, &csock->fd); + REQUIRE(result == ISC_R_SUCCESS || + result == ISC_R_NOTIMPLEMENTED); + REQUIRE(csock->fd >= 0); + + ievent = isc__nm_get_netievent_tcpdnslisten(mgr, csock); + isc__nm_enqueue_ievent(&mgr->workers[i], + (isc__netievent_t *)ievent); + } + + LOCK(&sock->lock); + while (sock->rchildren != mgr->nworkers) { + WAIT(&sock->cond, &sock->lock); + } + result = sock->result; + atomic_store(&sock->active, true); + BROADCAST(&sock->scond); + UNLOCK(&sock->lock); + INSIST(result != ISC_R_DEFAULT); + + if (result == ISC_R_SUCCESS) { + REQUIRE(sock->rchildren == mgr->nworkers); + *sockp = sock; + } else { + atomic_store(&sock->active, false); + isc__nm_tcpdns_stoplistening(sock); + isc_nmsocket_close(&sock); + } + + return (result); +} + +void +isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_tcpdnslisten_t *ievent = + (isc__netievent_tcpdnslisten_t *)ev0; + isc_nmiface_t *iface; + sa_family_t sa_family; + int r; + int flags = 0; + isc_nmsocket_t *sock = NULL; + + REQUIRE(VALID_NMSOCK(ievent->sock)); + REQUIRE(ievent->sock->tid == isc_nm_tid()); + REQUIRE(VALID_NMSOCK(ievent->sock->parent)); + + sock = ievent->sock; + iface = sock->iface; + sa_family = iface->addr.type.sa.sa_family; + + REQUIRE(sock->type == isc_nm_tcpdnssocket); + REQUIRE(sock->iface != NULL); + REQUIRE(sock->parent != NULL); + REQUIRE(sock->tid == isc_nm_tid()); + + /* TODO: set min mss */ + + r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp); + RUNTIME_CHECK(r == 0); + uv_handle_set_data(&sock->uv_handle.handle, sock); + /* This keeps the socket alive after everything else is gone */ + isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL }); + + r = uv_timer_init(&worker->loop, &sock->timer); + RUNTIME_CHECK(r == 0); + uv_handle_set_data((uv_handle_t *)&sock->timer, sock); + + r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd); + if (r < 0) { + isc__nm_closesocket(sock->fd); + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]); + goto failure; + } + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]); + + if (sa_family == AF_INET6) { + flags = UV_TCP_IPV6ONLY; + } + + r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, + &sock->iface->addr.type.sa, flags); + if (r < 0 && r != UV_EINVAL) { + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]); + goto failure; + } + + /* + * The callback will run in the same thread uv_listen() was called + * from, so a race with tcpdns_connection_cb() isn't possible. + */ + r = uv_listen((uv_stream_t *)&sock->uv_handle.tcp, sock->backlog, + tcpdns_connection_cb); + if (r < 0) { + isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR, + "uv_listen failed: %s", + isc_result_totext(isc__nm_uverr2result(r))); + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]); + goto failure; + } + + atomic_store(&sock->listening, true); + + LOCK(&sock->parent->lock); + sock->parent->rchildren += 1; + if (sock->parent->result == ISC_R_DEFAULT) { + sock->parent->result = ISC_R_SUCCESS; + } + SIGNAL(&sock->parent->cond); + if (!atomic_load(&sock->parent->active)) { + WAIT(&sock->parent->scond, &sock->parent->lock); + } + INSIST(atomic_load(&sock->parent->active)); + UNLOCK(&sock->parent->lock); + + return; + +failure: + sock->pquota = NULL; + + LOCK(&sock->parent->lock); + sock->parent->rchildren += 1; + if (sock->parent->result == ISC_R_DEFAULT) { + sock->parent->result = isc__nm_uverr2result(r); + } + SIGNAL(&sock->parent->cond); + if (!atomic_load(&sock->parent->active)) { + WAIT(&sock->parent->scond, &sock->parent->lock); + } + INSIST(atomic_load(&sock->parent->active)); + UNLOCK(&sock->parent->lock); +} + +static void +tcpdns_connection_cb(uv_stream_t *server, int status) { + isc_nmsocket_t *ssock = uv_handle_get_data((uv_handle_t *)server); + isc_result_t result; + isc_quota_t *quota = NULL; + + if (status != 0) { + result = isc__nm_uverr2result(status); + goto done; + } + + REQUIRE(VALID_NMSOCK(ssock)); + REQUIRE(ssock->tid == isc_nm_tid()); + + if (inactive(ssock)) { + result = ISC_R_CANCELED; + goto done; + } + + if (ssock->pquota != NULL) { + result = isc_quota_attach_cb(ssock->pquota, "a, + &ssock->quotacb); + if (result == ISC_R_QUOTA) { + isc__nm_incstats(ssock->mgr, + ssock->statsindex[STATID_ACCEPTFAIL]); + return; } } - /* We need to create a 'wrapper' dnssocket for this connection */ - dnssock = isc_mem_get(handle->sock->mgr->mctx, sizeof(*dnssock)); - isc__nmsocket_init(dnssock, handle->sock->mgr, isc_nm_tcpdnssocket, - handle->sock->iface); + result = accept_connection(ssock, quota); +done: + if (result != ISC_R_SUCCESS && result != ISC_R_NOCONN) { + if ((result != ISC_R_QUOTA && result != ISC_R_SOFTQUOTA) || + can_log_tcpdns_quota()) + { + isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR, + "TCP connection failed: %s", + isc_result_totext(result)); + } + } +} - dnssock->extrahandlesize = dnslistensock->extrahandlesize; - isc__nmsocket_attach(dnslistensock, &dnssock->listener); +static void +enqueue_stoplistening(isc_nmsocket_t *sock) { + isc__netievent_tcpdnsstop_t *ievent = + isc__nm_get_netievent_tcpdnsstop(sock->mgr, sock); + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); +} - isc__nmsocket_attach(dnssock, &dnssock->self); +void +isc__nm_tcpdns_stoplistening(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tcpdnslistener); - isc_nmhandle_attach(handle, &dnssock->outerhandle); + if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false }, + true)) { + INSIST(0); + ISC_UNREACHABLE(); + } + enqueue_stoplistening(sock); +} - dnssock->peer = handle->sock->peer; - dnssock->read_timeout = handle->sock->mgr->init; - dnssock->tid = isc_nm_tid(); - dnssock->closehandle_cb = resume_processing; +void +isc__nm_async_tcpdnsstop(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_tcpdnsstop_t *ievent = + (isc__netievent_tcpdnsstop_t *)ev0; + isc_nmsocket_t *sock = ievent->sock; - uv_timer_init(&dnssock->mgr->workers[isc_nm_tid()].loop, - &dnssock->timer); - dnssock->timer.data = dnssock; - dnssock->timer_initialized = true; - uv_timer_start(&dnssock->timer, dnstcp_readtimeout, - dnssock->read_timeout, 0); - dnssock->timer_running = true; + UNUSED(worker); + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + + if (sock->parent != NULL) { + stop_tcpdns_child(sock); + return; + } /* - * Add a reference to handle to keep it from being freed by - * the caller. It will be detached in dnslisted_readcb() when - * the connection is closed or there is no more data to be read. + * If network manager is interlocked, re-enqueue the event for later. */ - isc_nmhandle_attach(handle, &readhandle); - isc_nm_read(readhandle, dnslisten_readcb, dnssock); - isc__nmsocket_detach(&dnssock); + if (!isc__nm_acquire_interlocked(sock->mgr)) { + enqueue_stoplistening(sock); + } else { + stop_tcpdns_parent(sock); + isc__nm_drop_interlocked(sock->mgr); + } +} - return (ISC_R_SUCCESS); +static void +failed_read_cb(isc_nmsocket_t *sock, isc_result_t result) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(result != ISC_R_SUCCESS); + + stop_reading(sock); + + if (!sock->recv_read) { + goto destroy; + } + sock->recv_read = false; + + if (sock->recv_cb != NULL) { + isc__nm_uvreq_t *req = get_read_req(sock); + isc__nmsocket_clearcb(sock); + isc__nm_readcb(sock, req, result); + } + +destroy: + isc__nmsocket_prep_destroy(sock); + + /* We need to detach from quota after the read callback function had a + * chance to be executed. */ + if (sock->quota) { + isc_quota_detach(&sock->quota); + } +} + +static void +failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, + isc_result_t eresult) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(VALID_UVREQ(req)); + + if (req->cb.send != NULL) { + isc__nm_sendcb(sock, req, eresult); + } else { + isc__nm_uvreq_put(&req, sock); + } +} + +static isc__nm_uvreq_t * +get_read_req(isc_nmsocket_t *sock) { + isc__nm_uvreq_t *req = NULL; + + req = isc__nm_uvreq_get(sock->mgr, sock); + req->cb.recv = sock->recv_cb; + req->cbarg = sock->recv_cbarg; + + if (atomic_load(&sock->client)) { + isc_nmhandle_attach(sock->statichandle, &req->handle); + } else { + req->handle = isc__nmhandle_get(sock, NULL, NULL); + } + + return req; +} + +static void +readtimeout_cb(uv_timer_t *timer) { + isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)timer); + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(sock->reading); + + /* + * Timeout; stop reading and process whatever we have. + */ + + failed_read_cb(sock, ISC_R_TIMEDOUT); +} + +static void +start_sock_timer(isc_nmsocket_t *sock) { + if (sock->read_timeout > 0) { + int r = uv_timer_start(&sock->timer, readtimeout_cb, + sock->read_timeout, 0); + REQUIRE(r == 0); + } +} + +static void +stop_sock_timer(isc_nmsocket_t *sock) { + int r = uv_timer_stop(&sock->timer); + REQUIRE(r == 0); +} + +static void +start_reading(isc_nmsocket_t *sock) { + if (sock->reading) { + return; + } + + int r = uv_read_start(&sock->uv_handle.stream, tcpdns_alloc_cb, + read_cb); + REQUIRE(r == 0); + sock->reading = true; + + start_sock_timer(sock); +} + +static void +stop_reading(isc_nmsocket_t *sock) { + if (!sock->reading) { + return; + } + + int r = uv_read_stop(&sock->uv_handle.stream); + REQUIRE(r == 0); + sock->reading = false; + + stop_sock_timer(sock); +} + +void +isc__nm_tcpdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { + REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); + + isc_nmsocket_t *sock = handle->sock; + isc__netievent_tcpdnsread_t *ievent = NULL; + + REQUIRE(sock->type == isc_nm_tcpdnssocket); + REQUIRE(sock->statichandle == handle); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(!sock->recv_read); + + sock->recv_cb = cb; + sock->recv_cbarg = cbarg; + sock->recv_read = true; + if (sock->read_timeout == 0) { + sock->read_timeout = (atomic_load(&sock->keepalive) + ? sock->mgr->keepalive + : sock->mgr->idle); + } + + ievent = isc__nm_get_netievent_tcpdnsread(sock->mgr, sock); + + /* + * This MUST be done asynchronously, no matter which thread we're + * in. The callback function for isc_nm_read() often calls + * isc_nm_read() again; if we tried to do that synchronously + * we'd clash in processbuffer() and grow the stack indefinitely. + */ + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); + + return; +} + +/*%< + * Allocator for TCP read operations. Limited to size 2^16. + * + * Note this doesn't actually allocate anything, it just assigns the + * worker's receive buffer to a socket, and marks it as "in use". + */ +static void +tcpdns_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf) { + isc_nmsocket_t *sock = uv_handle_get_data(handle); + isc__networker_t *worker = NULL; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tcpdnssocket); + REQUIRE(isc__nm_in_netthread()); + UNUSED(size); + + worker = &sock->mgr->workers[sock->tid]; + INSIST(!worker->recvbuf_inuse); + + buf->base = worker->recvbuf; + buf->len = size; + worker->recvbuf_inuse = true; +} + +void +isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_tcpdnsread_t *ievent = + (isc__netievent_tcpdnsread_t *)ev0; + isc_nmsocket_t *sock = ievent->sock; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + UNUSED(worker); + + if (inactive(sock)) { + sock->reading = true; + failed_read_cb(sock, ISC_R_CANCELED); + return; + } + + process_sock_buffer(sock); } /* @@ -178,18 +918,23 @@ dnslisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { * The caller will need to unreference the handle. */ static isc_result_t -processbuffer(isc_nmsocket_t *dnssock, isc_nmhandle_t **handlep) { +processbuffer(isc_nmsocket_t *sock) { size_t len; + isc__nm_uvreq_t *req; + isc_nmhandle_t *handle = NULL; - REQUIRE(VALID_NMSOCK(dnssock)); - REQUIRE(dnssock->tid == isc_nm_tid()); - REQUIRE(handlep != NULL && *handlep == NULL); + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + + if (inactive(sock)) { + return (ISC_R_CANCELED); + } /* * If we don't even have the length yet, we can't do * anything. */ - if (dnssock->buf_len < 2) { + if (sock->buf_len < 2) { return (ISC_R_NOMORE); } @@ -197,544 +942,514 @@ processbuffer(isc_nmsocket_t *dnssock, isc_nmhandle_t **handlep) { * Process the first packet from the buffer, leaving * the rest (if any) for later. */ - len = dnslen(dnssock->buf); - if (len <= dnssock->buf_len - 2) { - isc_nmhandle_t *dnshandle = NULL; - isc_nmsocket_t *listener = NULL; - isc_nm_recv_cb_t cb = NULL; - void *cbarg = NULL; - - if (atomic_load(&dnssock->client) && - dnssock->statichandle != NULL) { - isc_nmhandle_attach(dnssock->statichandle, &dnshandle); - } else { - dnshandle = isc__nmhandle_get(dnssock, NULL, NULL); - } - - listener = dnssock->listener; - if (listener != NULL) { - cb = listener->recv_cb; - cbarg = listener->recv_cbarg; - } else if (dnssock->recv_cb != NULL) { - cb = dnssock->recv_cb; - cbarg = dnssock->recv_cbarg; - /* - * We need to clear the read callback *before* - * calling it, because it might make another - * call to isc_nm_read() and set up a new callback. - */ - isc__nmsocket_clearcb(dnssock); - } - - if (cb != NULL) { - cb(dnshandle, ISC_R_SUCCESS, - &(isc_region_t){ .base = dnssock->buf + 2, - .length = len }, - cbarg); - } - - len += 2; - dnssock->buf_len -= len; - if (len > 0) { - memmove(dnssock->buf, dnssock->buf + len, - dnssock->buf_len); - } - - *handlep = dnshandle; - return (ISC_R_SUCCESS); + len = ntohs(*(uint16_t *)sock->buf); + if (len > sock->buf_len - 2) { + return (ISC_R_NOMORE); } - return (ISC_R_NOMORE); + req = get_read_req(sock); + REQUIRE(VALID_UVREQ(req)); + + /* + * We need to launch the resume_processing after the buffer has + * been consumed, thus we need to delay the detaching the handle. + */ + isc_nmhandle_attach(req->handle, &handle); + + /* + * The callback will be called synchronously because the + * result is ISC_R_SUCCESS, so we don't need to have + * the buffer on the heap + */ + req->uvbuf.base = (char *)sock->buf + 2; + req->uvbuf.len = len; + + /* + * If isc__nm_tcpdns_read() was called, it will be satisfied by single + * DNS message in the next call. + */ + sock->recv_read = false; + + /* + * The assertion failure here means that there's a errnoneous extra + * nmhandle detach happening in the callback and resume_processing gets + * called while we are still processing the buffer. + */ + REQUIRE(sock->processing == false); + sock->processing = true; + isc__nm_readcb(sock, req, ISC_R_SUCCESS); + sock->processing = false; + + len += 2; + sock->buf_len -= len; + if (len > 0) { + memmove(sock->buf, sock->buf + len, sock->buf_len); + } + + isc_nmhandle_detach(&handle); + + return (ISC_R_SUCCESS); } -/* - * We've got a read on our underlying socket. Check whether - * we have a complete DNS packet and, if so, call the callback. - */ static void -dnslisten_readcb(isc_nmhandle_t *handle, isc_result_t eresult, - isc_region_t *region, void *arg) { - isc_nmsocket_t *dnssock = (isc_nmsocket_t *)arg; - unsigned char *base = NULL; - bool done = false; +read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { + isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)stream); + uint8_t *base = NULL; size_t len; - REQUIRE(VALID_NMSOCK(dnssock)); - REQUIRE(dnssock->tid == isc_nm_tid()); - REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(sock->reading); + REQUIRE(buf != NULL); - if (!isc__nmsocket_active(dnssock) || atomic_load(&dnssock->closing) || - dnssock->outerhandle == NULL || - (dnssock->listener != NULL && - !isc__nmsocket_active(dnssock->listener)) || - atomic_load(&dnssock->mgr->closing)) - { - if (eresult == ISC_R_SUCCESS) { - eresult = ISC_R_CANCELED; - } + if (inactive(sock)) { + failed_read_cb(sock, ISC_R_CANCELED); + goto free; } - if (region == NULL || eresult != ISC_R_SUCCESS) { - isc_nm_recv_cb_t cb = dnssock->recv_cb; - void *cbarg = dnssock->recv_cbarg; - - /* Connection closed */ - atomic_store(&dnssock->result, eresult); - isc__nmsocket_clearcb(dnssock); - if (atomic_load(&dnssock->client) && cb != NULL) { - cb(dnssock->statichandle, eresult, NULL, cbarg); + if (nread < 0) { + if (nread != UV_EOF) { + isc__nm_incstats(sock->mgr, + sock->statsindex[STATID_RECVFAIL]); } - if (dnssock->self != NULL) { - isc__nmsocket_detach(&dnssock->self); - } - if (dnssock->outerhandle != NULL) { - isc__nmsocket_clearcb(dnssock->outerhandle->sock); - isc_nmhandle_detach(&dnssock->outerhandle); - } - if (dnssock->listener != NULL) { - isc__nmsocket_detach(&dnssock->listener); - } + failed_read_cb(sock, isc__nm_uverr2result(nread)); - /* - * Server connections will hold two handle references when - * shut down, but client (tcpdnsconnect) connections have - * only one. - */ - if (!atomic_load(&dnssock->client)) { - isc_nmhandle_detach(&handle); - } - return; + goto free; } - base = region->base; - len = region->length; + base = (uint8_t *)buf->base; + len = nread; - if (dnssock->buf_len + len > dnssock->buf_size) { - alloc_dnsbuf(dnssock, dnssock->buf_len + len); + /* + * FIXME: We can avoid the memmove here if we know we have received full + * packet; e.g. we should be smarter, a.s. there are just few situations + * + * The tcp_alloc_buf should be smarter and point the uv_read_start to + * the position where previous read has ended in the sock->buf, that way + * the data could be read directly into sock->buf. + */ + + if (sock->buf_len + len > sock->buf_size) { + alloc_dnsbuf(sock, sock->buf_len + len); } - memmove(dnssock->buf + dnssock->buf_len, base, len); - dnssock->buf_len += len; + memmove(sock->buf + sock->buf_len, base, len); + sock->buf_len += len; - dnssock->read_timeout = (atomic_load(&dnssock->keepalive) - ? dnssock->mgr->keepalive - : dnssock->mgr->idle); + if (!atomic_load(&sock->client)) { + sock->read_timeout = sock->mgr->idle; + } - do { - isc_result_t result; - isc_nmhandle_t *dnshandle = NULL; + process_sock_buffer(sock); +free: + isc__nm_free_uvbuf(sock, buf); +} - result = processbuffer(dnssock, &dnshandle); - if (result != ISC_R_SUCCESS) { - /* - * There wasn't anything in the buffer to process. - */ - return; - } +static void +quota_accept_cb(isc_quota_t *quota, void *sock0) { + isc_nmsocket_t *sock = (isc_nmsocket_t *)sock0; + isc__netievent_tcpdnsaccept_t *ievent = NULL; - /* - * We have a packet: stop timeout timers - */ - atomic_store(&dnssock->outerhandle->sock->processing, true); - if (dnssock->timer_initialized) { - uv_timer_stop(&dnssock->timer); - } + REQUIRE(VALID_NMSOCK(sock)); - if (atomic_load(&dnssock->sequential) || - dnssock->recv_cb == NULL) { - /* - * There are two reasons we might want to pause here: - * - We're in sequential mode and we've received - * a whole packet, so we're done until it's been - * processed; or - * - We no longer have a read callback. - */ - isc_nm_pauseread(dnssock->outerhandle); - done = true; - } else { - /* - * We're pipelining, so we now resume processing - * packets until the clients-per-connection limit - * is reached (as determined by the number of - * active handles on the socket). When the limit - * is reached, pause reading. - */ - if (atomic_load(&dnssock->ah) >= - TCPDNS_CLIENTS_PER_CONN) { - isc_nm_pauseread(dnssock->outerhandle); - done = true; - } - } - - isc_nmhandle_detach(&dnshandle); - } while (!done); + /* + * Create a tcpdnsaccept event and pass it using the async channel. + */ + ievent = isc__nm_get_netievent_tcpdnsaccept(sock->mgr, sock, quota); + isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); } /* - * isc_nm_listentcpdns listens for connections and accepts - * them immediately, then calls the cb for each incoming DNS packet - * (with 2-byte length stripped) - just like for UDP packet. + * This is called after we get a quota_accept_cb() callback. */ -isc_result_t -isc_nm_listentcpdns(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb, - void *cbarg, isc_nm_accept_cb_t accept_cb, - void *accept_cbarg, size_t extrahandlesize, int backlog, - isc_quota_t *quota, isc_nmsocket_t **sockp) { - isc_nmsocket_t *dnslistensock = isc_mem_get(mgr->mctx, - sizeof(*dnslistensock)); - isc_result_t result; - - REQUIRE(VALID_NM(mgr)); - - isc__nmsocket_init(dnslistensock, mgr, isc_nm_tcpdnslistener, iface); - dnslistensock->recv_cb = cb; - dnslistensock->recv_cbarg = cbarg; - dnslistensock->accept_cb = accept_cb; - dnslistensock->accept_cbarg = accept_cbarg; - dnslistensock->extrahandlesize = extrahandlesize; - - result = isc_nm_listentcp(mgr, iface, dnslisten_acceptcb, dnslistensock, - extrahandlesize, backlog, quota, - &dnslistensock->outer); - if (result == ISC_R_SUCCESS) { - atomic_store(&dnslistensock->listening, true); - *sockp = dnslistensock; - return (ISC_R_SUCCESS); - } else { - atomic_store(&dnslistensock->closed, true); - isc__nmsocket_detach(&dnslistensock); - return (result); - } -} - -/* - * isc_nm_listentlsdns works exactly as listentcpdns but on an SSL socket. - */ -isc_result_t -isc_nm_listentlsdns(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb, - void *cbarg, isc_nm_accept_cb_t accept_cb, - void *accept_cbarg, size_t extrahandlesize, int backlog, - isc_quota_t *quota, SSL_CTX *sslctx, - isc_nmsocket_t **sockp) { - isc_nmsocket_t *dnslistensock = isc_mem_get(mgr->mctx, - sizeof(*dnslistensock)); - isc_result_t result; - - REQUIRE(VALID_NM(mgr)); - REQUIRE(sslctx != NULL); - - isc__nmsocket_init(dnslistensock, mgr, isc_nm_tcpdnslistener, iface); - dnslistensock->recv_cb = cb; - dnslistensock->recv_cbarg = cbarg; - dnslistensock->accept_cb = accept_cb; - dnslistensock->accept_cbarg = accept_cbarg; - dnslistensock->extrahandlesize = extrahandlesize; - - result = isc_nm_listentls(mgr, iface, dnslisten_acceptcb, dnslistensock, - extrahandlesize, backlog, quota, sslctx, - &dnslistensock->outer); - if (result == ISC_R_SUCCESS) { - atomic_store(&dnslistensock->listening, true); - *sockp = dnslistensock; - return (ISC_R_SUCCESS); - } else { - atomic_store(&dnslistensock->closed, true); - isc__nmsocket_detach(&dnslistensock); - return (result); - } -} - void -isc__nm_async_tcpdnsstop(isc__networker_t *worker, isc__netievent_t *ev0) { - isc__netievent_tcpstop_t *ievent = (isc__netievent_tcpdnsstop_t *)ev0; +isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_tcpdnsaccept_t *ievent = + (isc__netievent_tcpdnsaccept_t *)ev0; isc_nmsocket_t *sock = ievent->sock; + isc_result_t result; UNUSED(worker); - REQUIRE(isc__nm_in_netthread()); REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(sock->type == isc_nm_tcpdnslistener); REQUIRE(sock->tid == isc_nm_tid()); - atomic_store(&sock->listening, false); - atomic_store(&sock->closed, true); - - isc__nmsocket_clearcb(sock); - - if (sock->outer != NULL) { - switch (sock->outer->type) { - case isc_nm_tcplistener: - isc__nm_tcp_stoplistening(sock->outer); - break; - case isc_nm_tlslistener: - isc__nm_tls_stoplistening(sock->outer); - break; - default: - INSIST(0); - ISC_UNREACHABLE(); + result = accept_connection(sock, ievent->quota); + if (result != ISC_R_SUCCESS && result != ISC_R_NOCONN) { + if ((result != ISC_R_QUOTA && result != ISC_R_SOFTQUOTA) || + can_log_tcpdns_quota()) + { + isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR, + "TCP connection failed: %s", + isc_result_totext(result)); } - isc__nmsocket_detach(&sock->outer); } +} + +static isc_result_t +accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota) { + isc_nmsocket_t *csock = NULL; + isc__networker_t *worker = NULL; + int r; + isc_result_t result; + struct sockaddr_storage peer_ss; + struct sockaddr_storage local_ss; + isc_sockaddr_t local; + isc_nmhandle_t *handle; + + REQUIRE(VALID_NMSOCK(ssock)); + REQUIRE(ssock->tid == isc_nm_tid()); + + if (inactive(ssock)) { + if (quota != NULL) { + isc_quota_detach("a); + } + return (ISC_R_CANCELED); + } + + REQUIRE(ssock->accept_cb != NULL); + + csock = isc_mem_get(ssock->mgr->mctx, sizeof(isc_nmsocket_t)); + isc__nmsocket_init(csock, ssock->mgr, isc_nm_tcpdnssocket, + ssock->iface); + csock->tid = ssock->tid; + csock->extrahandlesize = ssock->extrahandlesize; + isc__nmsocket_attach(ssock, &csock->server); + csock->recv_cb = ssock->recv_cb; + csock->recv_cbarg = ssock->recv_cbarg; + csock->quota = quota; + csock->accepting = true; + + worker = &csock->mgr->workers[csock->tid]; + + r = uv_tcp_init(&worker->loop, &csock->uv_handle.tcp); + RUNTIME_CHECK(r == 0); + uv_handle_set_data(&csock->uv_handle.handle, csock); + + r = uv_timer_init(&worker->loop, &csock->timer); + RUNTIME_CHECK(r == 0); + uv_handle_set_data((uv_handle_t *)&csock->timer, csock); + + r = uv_accept(&ssock->uv_handle.stream, &csock->uv_handle.stream); + if (r != 0) { + result = isc__nm_uverr2result(r); + goto failure; + } + + r = uv_tcp_getpeername(&csock->uv_handle.tcp, + (struct sockaddr *)&peer_ss, + &(int){ sizeof(peer_ss) }); + if (r != 0) { + result = isc__nm_uverr2result(r); + goto failure; + } + + result = isc_sockaddr_fromsockaddr(&csock->peer, + (struct sockaddr *)&peer_ss); + if (result != ISC_R_SUCCESS) { + goto failure; + } + + r = uv_tcp_getsockname(&csock->uv_handle.tcp, + (struct sockaddr *)&local_ss, + &(int){ sizeof(local_ss) }); + if (r != 0) { + result = isc__nm_uverr2result(r); + goto failure; + } + + result = isc_sockaddr_fromsockaddr(&local, + (struct sockaddr *)&local_ss); + if (result != ISC_R_SUCCESS) { + goto failure; + } + + /* + * The handle will be either detached on acceptcb failure or in the + * readcb. + */ + handle = isc__nmhandle_get(csock, NULL, &local); + + result = ssock->accept_cb(handle, ISC_R_SUCCESS, ssock->accept_cbarg); + if (result != ISC_R_SUCCESS) { + isc_nmhandle_detach(&handle); + goto failure; + } + + csock->accepting = false; + + isc__nm_incstats(csock->mgr, csock->statsindex[STATID_ACCEPT]); + + csock->read_timeout = csock->mgr->init; + + csock->closehandle_cb = resume_processing; + + /* + * We need to keep the handle alive until we fail to read or connection + * is closed by the other side, it will be detached via + * prep_destroy()->tcpdns_close_direct(). + */ + isc_nmhandle_attach(handle, &csock->recv_handle); + start_reading(csock); + + /* + * The initial timer has been set, update the read timeout for the next + * reads. + */ + csock->read_timeout = (atomic_load(&csock->keepalive) + ? csock->mgr->keepalive + : csock->mgr->idle); + + isc_nmhandle_detach(&handle); + + /* + * sock is now attached to the handle. + */ + isc__nmsocket_detach(&csock); + + return (ISC_R_SUCCESS); + +failure: + + atomic_store(&csock->active, false); + + failed_accept_cb(csock, result); + + isc__nmsocket_prep_destroy(csock); + + isc__nmsocket_detach(&csock); + + return (result); +} + +void +isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region, + isc_nm_cb_t cb, void *cbarg) { + REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); + + isc_nmsocket_t *sock = handle->sock; + isc__netievent_tcpdnssend_t *ievent = NULL; + isc__nm_uvreq_t *uvreq = NULL; + + REQUIRE(sock->type == isc_nm_tcpdnssocket); + + uvreq = isc__nm_uvreq_get(sock->mgr, sock); + uvreq->uvbuf.base = (char *)region->base; + uvreq->uvbuf.len = region->length; + + isc_nmhandle_attach(handle, &uvreq->handle); + + uvreq->cb.send = cb; + uvreq->cbarg = cbarg; + + ievent = isc__nm_get_netievent_tcpdnssend(sock->mgr, sock, uvreq); + isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); + + return; +} + +static void +tcpdns_send_cb(uv_write_t *req, int status) { + isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data; + isc_nmsocket_t *sock = uvreq->sock; + + REQUIRE(VALID_UVREQ(uvreq)); + REQUIRE(VALID_NMHANDLE(uvreq->handle)); + + if (status < 0) { + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_SENDFAIL]); + failed_send_cb(sock, uvreq, isc__nm_uverr2result(status)); + return; + } + + isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS); +} + +/* + * Handle 'tcpsend' async event - send a packet on the socket + */ +void +isc__nm_async_tcpdnssend(isc__networker_t *worker, isc__netievent_t *ev0) { + isc_result_t result; + isc__netievent_tcpdnssend_t *ievent = + (isc__netievent_tcpdnssend_t *)ev0; + isc_nmsocket_t *sock = ievent->sock; + isc__nm_uvreq_t *uvreq = ievent->req; + + REQUIRE(sock->type == isc_nm_tcpdnssocket); + REQUIRE(sock->tid == isc_nm_tid()); + UNUSED(worker); + + result = tcpdns_send_direct(sock, uvreq); + if (result != ISC_R_SUCCESS) { + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_SENDFAIL]); + failed_send_cb(sock, uvreq, result); + } +} + +static isc_result_t +tcpdns_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(VALID_UVREQ(req)); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(sock->type == isc_nm_tcpdnssocket); + + int r; + uint16_t len = htons(req->uvbuf.len); + uv_buf_t bufs[2] = { { .base = (char *)&len, .len = 2 }, + { .base = req->uvbuf.base, + .len = req->uvbuf.len } }; + + if (inactive(sock)) { + return (ISC_R_CANCELED); + } + + r = uv_write(&req->uv_req.write, &sock->uv_handle.stream, bufs, 2, + tcpdns_send_cb); + if (r < 0) { + return (isc__nm_uverr2result(r)); + } + + return (ISC_R_SUCCESS); +} + +static void +tcpdns_stop_cb(uv_handle_t *handle) { + isc_nmsocket_t *sock = uv_handle_get_data(handle); + uv_handle_set_data(handle, NULL); + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(atomic_load(&sock->closing)); + + if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false }, + true)) { + INSIST(0); + ISC_UNREACHABLE(); + } + + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CLOSE]); + + atomic_store(&sock->listening, false); isc__nmsocket_detach(&sock); } -void -isc__nm_tcpdns_stoplistening(isc_nmsocket_t *sock) { +static void +tcpdns_close_cb(uv_handle_t *handle) { + isc_nmsocket_t *sock = uv_handle_get_data(handle); + uv_handle_set_data(handle, NULL); + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(atomic_load(&sock->closing)); + + if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false }, + true)) { + INSIST(0); + ISC_UNREACHABLE(); + } + + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CLOSE]); + + if (sock->server != NULL) { + isc__nmsocket_detach(&sock->server); + } + + atomic_store(&sock->connected, false); + + isc__nmsocket_prep_destroy(sock); +} + +static void +timer_close_cb(uv_handle_t *handle) { + isc_nmsocket_t *sock = uv_handle_get_data(handle); + uv_handle_set_data(handle, NULL); + + if (sock->parent) { + uv_close(&sock->uv_handle.handle, tcpdns_stop_cb); + } else { + uv_close(&sock->uv_handle.handle, tcpdns_close_cb); + } +} + +static void +stop_tcpdns_child(isc_nmsocket_t *sock) { + REQUIRE(sock->type == isc_nm_tcpdnssocket); + REQUIRE(sock->tid == isc_nm_tid()); + + if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false }, + true)) { + return; + } + + tcpdns_close_direct(sock); + + LOCK(&sock->parent->lock); + sock->parent->rchildren -= 1; + UNLOCK(&sock->parent->lock); + BROADCAST(&sock->parent->cond); +} + +static void +stop_tcpdns_parent(isc_nmsocket_t *sock) { REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->type == isc_nm_tcpdnslistener); - isc__netievent_tcpdnsstop_t *ievent = - isc__nm_get_ievent(sock->mgr, netievent_tcpdnsstop); - isc__nmsocket_attach(sock, &ievent->sock); - isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); -} + for (int i = 0; i < sock->nchildren; i++) { + isc__netievent_tcpdnsstop_t *ievent = NULL; + isc_nmsocket_t *csock = &sock->children[i]; + REQUIRE(VALID_NMSOCK(csock)); -void -isc_nm_tcpdns_sequential(isc_nmhandle_t *handle) { - REQUIRE(VALID_NMHANDLE(handle)); + atomic_store(&csock->active, false); - if (handle->sock->type != isc_nm_tcpdnssocket || - handle->sock->outerhandle == NULL) - { - return; - } - - /* - * We don't want pipelining on this connection. That means - * that we need to pause after reading each request, and - * resume only after the request has been processed. This - * is done in resume_processing(), which is the socket's - * closehandle_cb callback, called whenever a handle - * is released. - */ - isc_nm_pauseread(handle->sock->outerhandle); - atomic_store(&handle->sock->sequential, true); -} - -void -isc_nm_tcpdns_keepalive(isc_nmhandle_t *handle, bool value) { - REQUIRE(VALID_NMHANDLE(handle)); - - if (handle->sock->type != isc_nm_tcpdnssocket || - handle->sock->outerhandle == NULL) - { - return; - } - - atomic_store(&handle->sock->keepalive, value); - atomic_store(&handle->sock->outerhandle->sock->keepalive, value); -} - -static void -resume_processing(void *arg) { - isc_nmsocket_t *sock = (isc_nmsocket_t *)arg; - isc_result_t result; - - REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(sock->tid == isc_nm_tid()); - - if (sock->type != isc_nm_tcpdnssocket || sock->outerhandle == NULL) { - return; - } - - if (atomic_load(&sock->ah) == 0) { - /* Nothing is active; sockets can timeout now */ - atomic_store(&sock->outerhandle->sock->processing, false); - if (sock->timer_initialized) { - uv_timer_start(&sock->timer, dnstcp_readtimeout, - sock->read_timeout, 0); - sock->timer_running = true; - } - } - - /* - * For sequential sockets: Process what's in the buffer, or - * if there aren't any messages buffered, resume reading. - */ - if (atomic_load(&sock->sequential)) { - isc_nmhandle_t *handle = NULL; - - result = processbuffer(sock, &handle); - if (result == ISC_R_SUCCESS) { - atomic_store(&sock->outerhandle->sock->processing, - true); - if (sock->timer_initialized) { - uv_timer_stop(&sock->timer); - } - isc_nmhandle_detach(&handle); - } else if (sock->outerhandle != NULL) { - isc_nm_resumeread(sock->outerhandle); + if (csock->tid == isc_nm_tid()) { + stop_tcpdns_child(csock); + continue; } - return; + ievent = isc__nm_get_netievent_tcpdnsstop(sock->mgr, csock); + isc__nm_enqueue_ievent(&sock->mgr->workers[csock->tid], + (isc__netievent_t *)ievent); } - /* - * For pipelined sockets: If we're under the clients-per-connection - * limit, resume processing until we reach the limit again. - */ - do { - isc_nmhandle_t *dnshandle = NULL; - - result = processbuffer(sock, &dnshandle); - if (result != ISC_R_SUCCESS) { - /* - * Nothing in the buffer; resume reading. - */ - if (sock->outerhandle != NULL) { - isc_nm_resumeread(sock->outerhandle); - } - - break; - } - - if (sock->timer_initialized) { - uv_timer_stop(&sock->timer); - } - atomic_store(&sock->outerhandle->sock->processing, true); - isc_nmhandle_detach(&dnshandle); - } while (atomic_load(&sock->ah) < TCPDNS_CLIENTS_PER_CONN); -} - -static void -tcpdnssend_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { - isc__nm_uvreq_t *req = (isc__nm_uvreq_t *)cbarg; - REQUIRE(VALID_UVREQ(req)); - - UNUSED(handle); - - req->cb.send(req->handle, result, req->cbarg); - isc_mem_put(req->sock->mgr->mctx, req->uvbuf.base, req->uvbuf.len); - isc__nm_uvreq_put(&req, req->handle->sock); - isc_nmhandle_detach(&handle); -} - -/* - * The socket is closing, outerhandle has been detached, listener is - * inactive, or the netmgr is closing: any operation on it should abort - * with ISC_R_CANCELED. - */ -static bool -inactive(isc_nmsocket_t *sock) { - return (!isc__nmsocket_active(sock) || atomic_load(&sock->closing) || - sock->outerhandle == NULL || - (sock->listener != NULL && - !isc__nmsocket_active(sock->listener)) || - atomic_load(&sock->mgr->closing)); -} - -void -isc__nm_async_tcpdnssend(isc__networker_t *worker, isc__netievent_t *ev0) { - isc__netievent_tcpdnssend_t *ievent = - (isc__netievent_tcpdnssend_t *)ev0; - isc__nm_uvreq_t *req = ievent->req; - isc_nmsocket_t *sock = ievent->sock; - isc_nmhandle_t *sendhandle = NULL; - isc_region_t r; - - REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(VALID_UVREQ(req)); - REQUIRE(worker->id == sock->tid); - REQUIRE(sock->tid == isc_nm_tid()); - REQUIRE(sock->type == isc_nm_tcpdnssocket); - - if (inactive(sock)) { - req->cb.send(req->handle, ISC_R_CANCELED, req->cbarg); - isc_mem_put(sock->mgr->mctx, req->uvbuf.base, req->uvbuf.len); - isc__nm_uvreq_put(&req, req->handle->sock); - return; + LOCK(&sock->lock); + while (sock->rchildren > 0) { + WAIT(&sock->cond, &sock->lock); } + atomic_store(&sock->closed, true); + UNLOCK(&sock->lock); - r.base = (unsigned char *)req->uvbuf.base; - r.length = req->uvbuf.len; - isc_nmhandle_attach(sock->outerhandle, &sendhandle); - isc_nm_send(sendhandle, &r, tcpdnssend_cb, req); -} - -/* - * isc__nm_tcp_send sends buf to a peer on a socket. - */ -void -isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region, - isc_nm_cb_t cb, void *cbarg) { - isc__nm_uvreq_t *uvreq = NULL; - - REQUIRE(VALID_NMHANDLE(handle)); - - isc_nmsocket_t *sock = handle->sock; - - REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(sock->type == isc_nm_tcpdnssocket); - - if (inactive(sock)) { - cb(handle, ISC_R_CANCELED, cbarg); - return; - } - - uvreq = isc__nm_uvreq_get(sock->mgr, sock); - isc_nmhandle_attach(handle, &uvreq->handle); - uvreq->cb.send = cb; - uvreq->cbarg = cbarg; - - uvreq->uvbuf.base = isc_mem_get(sock->mgr->mctx, region->length + 2); - uvreq->uvbuf.len = region->length + 2; - *(uint16_t *)uvreq->uvbuf.base = htons(region->length); - memmove(uvreq->uvbuf.base + 2, region->base, region->length); - - isc__netievent_tcpdnssend_t *ievent = NULL; - - ievent = isc__nm_get_ievent(sock->mgr, netievent_tcpdnssend); - ievent->req = uvreq; - ievent->sock = sock; - - isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); + isc__nmsocket_prep_destroy(sock); } static void tcpdns_close_direct(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(atomic_load(&sock->closing)); - if (sock->timer_running) { - uv_timer_stop(&sock->timer); - sock->timer_running = false; + if (sock->quota != NULL) { + isc_quota_detach(&sock->quota); } - /* We don't need atomics here, it's all in single network thread */ - if (sock->self != NULL) { - isc__nmsocket_detach(&sock->self); - } else if (sock->timer_initialized) { - /* - * We need to fire the timer callback to clean it up, - * it will then call us again (via detach) so that we - * can finally close the socket. - */ - sock->timer_initialized = false; - uv_timer_stop(&sock->timer); - uv_close((uv_handle_t *)&sock->timer, timer_close_cb); - } else { - /* - * At this point we're certain that there are no external - * references, we can close everything. - */ - if (sock->outerhandle != NULL) { - isc__nmsocket_clearcb(sock->outerhandle->sock); - isc_nmhandle_detach(&sock->outerhandle); - } - if (sock->listener != NULL) { - isc__nmsocket_detach(&sock->listener); - } - atomic_store(&sock->closed, true); - isc__nmsocket_prep_destroy(sock); + if (sock->recv_handle != NULL) { + isc_nmhandle_detach(&sock->recv_handle); } + + stop_reading(sock); + uv_close((uv_handle_t *)&sock->timer, timer_close_cb); } void isc__nm_tcpdns_close(isc_nmsocket_t *sock) { REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->type == isc_nm_tcpdnssocket); + REQUIRE(!isc__nmsocket_active(sock)); if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false }, true)) { @@ -744,10 +1459,12 @@ isc__nm_tcpdns_close(isc_nmsocket_t *sock) { if (sock->tid == isc_nm_tid()) { tcpdns_close_direct(sock); } else { + /* + * We need to create an event and pass it using async channel + */ isc__netievent_tcpdnsclose_t *ievent = - isc__nm_get_ievent(sock->mgr, netievent_tcpdnsclose); + isc__nm_get_netievent_tcpdnsclose(sock->mgr, sock); - ievent->sock = sock; isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)ievent); } @@ -764,221 +1481,38 @@ isc__nm_async_tcpdnsclose(isc__networker_t *worker, isc__netievent_t *ev0) { UNUSED(worker); - tcpdns_close_direct(ievent->sock); -} - -typedef struct { - isc_mem_t *mctx; - isc_nm_cb_t cb; - void *cbarg; - size_t extrahandlesize; -} tcpconnect_t; - -static void -tcpdnsconnect_cb(isc_nmhandle_t *handle, isc_result_t result, void *arg) { - tcpconnect_t *conn = (tcpconnect_t *)arg; - isc_nm_cb_t cb = conn->cb; - void *cbarg = conn->cbarg; - size_t extrahandlesize = conn->extrahandlesize; - isc_nmsocket_t *dnssock = NULL; - isc_nmhandle_t *readhandle = NULL; - - REQUIRE(result != ISC_R_SUCCESS || VALID_NMHANDLE(handle)); - - isc_mem_putanddetach(&conn->mctx, conn, sizeof(*conn)); - - dnssock = isc_mem_get(handle->sock->mgr->mctx, sizeof(*dnssock)); - isc__nmsocket_init(dnssock, handle->sock->mgr, isc_nm_tcpdnssocket, - handle->sock->iface); - - dnssock->extrahandlesize = extrahandlesize; - isc_nmhandle_attach(handle, &dnssock->outerhandle); - - dnssock->peer = handle->sock->peer; - dnssock->read_timeout = handle->sock->mgr->init; - dnssock->tid = isc_nm_tid(); - - atomic_init(&dnssock->client, true); - - readhandle = isc__nmhandle_get(dnssock, NULL, NULL); - - if (result != ISC_R_SUCCESS) { - cb(readhandle, result, cbarg); - isc__nmsocket_detach(&dnssock); - isc_nmhandle_detach(&readhandle); - return; - } - - INSIST(dnssock->statichandle != NULL); - INSIST(dnssock->statichandle == readhandle); - INSIST(readhandle->sock == dnssock); - INSIST(dnssock->recv_cb == NULL); - - uv_timer_init(&dnssock->mgr->workers[isc_nm_tid()].loop, - &dnssock->timer); - dnssock->timer.data = dnssock; - dnssock->timer_initialized = true; - uv_timer_start(&dnssock->timer, dnstcp_readtimeout, - dnssock->read_timeout, 0); - dnssock->timer_running = true; - - /* - * The connection is now established; we start reading immediately, - * before we've been asked to. We'll read and buffer at most one - * packet. - */ - isc_nm_read(handle, dnslisten_readcb, dnssock); - cb(readhandle, ISC_R_SUCCESS, cbarg); - - /* - * The sock is now attached to the handle. - */ - isc__nmsocket_detach(&dnssock); -} - -isc_result_t -isc_nm_tcpdnsconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer, - isc_nm_cb_t cb, void *cbarg, unsigned int timeout, - size_t extrahandlesize) { - isc_result_t result = ISC_R_SUCCESS; - tcpconnect_t *conn = isc_mem_get(mgr->mctx, sizeof(*conn)); - - *conn = (tcpconnect_t){ .cb = cb, - .cbarg = cbarg, - .extrahandlesize = extrahandlesize }; - isc_mem_attach(mgr->mctx, &conn->mctx); - result = isc_nm_tcpconnect(mgr, local, peer, tcpdnsconnect_cb, conn, - timeout, 0); - if (result != ISC_R_SUCCESS) { - isc_mem_putanddetach(&conn->mctx, conn, sizeof(*conn)); - } - return (result); -} - -isc_result_t -isc_nm_tlsdnsconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer, - isc_nm_cb_t cb, void *cbarg, unsigned int timeout, - size_t extrahandlesize) { - isc_result_t result = ISC_R_SUCCESS; - tcpconnect_t *conn = isc_mem_get(mgr->mctx, sizeof(tcpconnect_t)); - SSL_CTX *ctx = NULL; - - *conn = (tcpconnect_t){ .cb = cb, - .cbarg = cbarg, - .extrahandlesize = extrahandlesize }; - isc_mem_attach(mgr->mctx, &conn->mctx); - - ctx = SSL_CTX_new(SSLv23_client_method()); - result = isc_nm_tlsconnect(mgr, local, peer, tcpdnsconnect_cb, conn, - ctx, timeout, 0); - SSL_CTX_free(ctx); - if (result != ISC_R_SUCCESS) { - isc_mem_putanddetach(&conn->mctx, conn, sizeof(*conn)); - } - return (result); + tcpdns_close_direct(sock); } void -isc__nm_tcpdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { - isc_nmsocket_t *sock = NULL; - isc__netievent_tcpdnsread_t *ievent = NULL; - isc_nmhandle_t *eventhandle = NULL; - - REQUIRE(VALID_NMHANDLE(handle)); - - sock = handle->sock; - - REQUIRE(sock->statichandle == handle); +isc__nm_tcpdns_shutdown(isc_nmsocket_t *sock) { REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(sock->recv_cb == NULL); REQUIRE(sock->tid == isc_nm_tid()); - REQUIRE(atomic_load(&sock->client)); + REQUIRE(sock->type == isc_nm_tcpdnssocket); - if (inactive(sock)) { - cb(handle, ISC_R_NOTCONNECTED, NULL, cbarg); + /* + * If the socket is active, mark it inactive and + * continue. If it isn't active, stop now. + */ + if (!isc__nmsocket_deactivate(sock)) { + return; + } + + if (atomic_load(&sock->connecting) || sock->accepting) { + return; + } + + if (sock->statichandle) { + failed_read_cb(sock, ISC_R_CANCELED); return; } /* - * This MUST be done asynchronously, no matter which thread we're - * in. The callback function for isc_nm_read() often calls - * isc_nm_read() again; if we tried to do that synchronously - * we'd clash in processbuffer() and grow the stack indefinitely. + * Otherwise, we just send the socket to abyss... */ - ievent = isc__nm_get_ievent(sock->mgr, netievent_tcpdnsread); - ievent->sock = sock; - - sock->recv_cb = cb; - sock->recv_cbarg = cbarg; - - sock->read_timeout = (atomic_load(&sock->keepalive) - ? sock->mgr->keepalive - : sock->mgr->idle); - - /* - * Add a reference to the handle to keep it from being freed by - * the caller; it will be detached in in isc__nm_async_tcpdnsread(). - */ - isc_nmhandle_attach(handle, &eventhandle); - isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); -} - -void -isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0) { - isc_result_t result; - isc__netievent_tcpdnsread_t *ievent = - (isc__netievent_tcpdnsclose_t *)ev0; - isc_nmsocket_t *sock = ievent->sock; - isc_nmhandle_t *handle = NULL, *newhandle = NULL; - - REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(worker->id == sock->tid); - REQUIRE(sock->tid == isc_nm_tid()); - - handle = sock->statichandle; - - if (inactive(sock)) { - isc_nm_recv_cb_t cb = sock->recv_cb; - void *cbarg = sock->recv_cbarg; - - isc__nmsocket_clearcb(sock); - if (cb != NULL) { - cb(handle, ISC_R_NOTCONNECTED, NULL, cbarg); - } - - isc_nmhandle_detach(&handle); - return; + if (sock->parent == NULL) { + isc__nmsocket_prep_destroy(sock); } - - /* - * Maybe we have a packet already? - */ - result = processbuffer(sock, &newhandle); - if (result == ISC_R_SUCCESS) { - atomic_store(&sock->outerhandle->sock->processing, true); - if (sock->timer_initialized) { - uv_timer_stop(&sock->timer); - } - isc_nmhandle_detach(&newhandle); - } else if (sock->outerhandle != NULL) { - /* Restart reading, wait for the callback */ - atomic_store(&sock->outerhandle->sock->processing, false); - if (sock->timer_initialized) { - uv_timer_start(&sock->timer, dnstcp_readtimeout, - sock->read_timeout, 0); - sock->timer_running = true; - } - isc_nm_resumeread(sock->outerhandle); - } else { - isc_nm_recv_cb_t cb = sock->recv_cb; - void *cbarg = sock->recv_cbarg; - - isc__nmsocket_clearcb(sock); - cb(handle, ISC_R_NOTCONNECTED, NULL, cbarg); - } - - isc_nmhandle_detach(&handle); } void @@ -990,11 +1524,10 @@ isc__nm_tcpdns_cancelread(isc_nmhandle_t *handle) { sock = handle->sock; + REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->type == isc_nm_tcpdnssocket); - ievent = isc__nm_get_ievent(sock->mgr, netievent_tcpdnscancel); - ievent->sock = sock; - isc_nmhandle_attach(handle, &ievent->handle); + ievent = isc__nm_get_netievent_tcpdnscancel(sock->mgr, sock, handle); isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)ievent); } @@ -1004,45 +1537,117 @@ isc__nm_async_tcpdnscancel(isc__networker_t *worker, isc__netievent_t *ev0) { isc__netievent_tcpdnscancel_t *ievent = (isc__netievent_tcpdnscancel_t *)ev0; isc_nmsocket_t *sock = ievent->sock; - isc_nmhandle_t *handle = ievent->handle; REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(worker->id == sock->tid); REQUIRE(sock->tid == isc_nm_tid()); + UNUSED(worker); - if (atomic_load(&sock->client)) { - isc_nm_recv_cb_t cb; - void *cbarg = NULL; - - cb = sock->recv_cb; - cbarg = sock->recv_cbarg; - isc__nmsocket_clearcb(sock); - - if (cb != NULL) { - cb(handle, ISC_R_EOF, NULL, cbarg); - } - - isc__nm_tcp_cancelread(sock->outerhandle); - } - - isc_nmhandle_detach(&handle); + failed_read_cb(sock, ISC_R_EOF); } void isc__nm_tcpdns_settimeout(isc_nmhandle_t *handle, uint32_t timeout) { - isc_nmsocket_t *sock = NULL; - REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); - sock = handle->sock; - - if (sock->outerhandle != NULL) { - isc__nm_tcp_settimeout(sock->outerhandle, timeout); - } + isc_nmsocket_t *sock = handle->sock; sock->read_timeout = timeout; - if (sock->timer_running) { - uv_timer_start(&sock->timer, dnstcp_readtimeout, - sock->read_timeout, 0); + if (uv_is_active((uv_handle_t *)&sock->timer)) { + start_sock_timer(sock); } } + +void +isc_nm_tcpdns_sequential(isc_nmhandle_t *handle) { + REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); + REQUIRE(handle->sock->type == isc_nm_tcpdnssocket); + + isc_nmsocket_t *sock = handle->sock; + + /* + * We don't want pipelining on this connection. That means + * that we need to pause after reading each request, and + * resume only after the request has been processed. This + * is done in resume_processing(), which is the socket's + * closehandle_cb callback, called whenever a handle + * is released. + */ + + stop_reading(sock); + atomic_store(&sock->sequential, true); +} + +void +isc_nm_tcpdns_keepalive(isc_nmhandle_t *handle, bool value) { + REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); + REQUIRE(handle->sock->type != isc_nm_tcpdnssocket); + + isc_nmsocket_t *sock = handle->sock; + + atomic_store(&sock->keepalive, value); +} + +static void +process_sock_buffer(isc_nmsocket_t *sock) { + /* + * 1. When process_buffer receives incomplete DNS message, + * we don't touch any timers + * + * 2. When we receive at least one full DNS message, we stop the timers + * until resume_processing calls this function again and restarts the + * reading and the timers + */ + + /* + * Process a DNS messages. Stop if this is client socket, or the server + * socket has been set to sequential mode or the number of queries we + * are processing simultaneously have reached the clients-per-connection + * limit. + */ + for (;;) { + isc_result_t result = processbuffer(sock); + switch (result) { + case ISC_R_NOMORE: + start_reading(sock); + return; + case ISC_R_CANCELED: + stop_reading(sock); + return; + case ISC_R_SUCCESS: + if (atomic_load(&sock->client) || + atomic_load(&sock->sequential) || + atomic_load(&sock->ah) >= TCPDNS_CLIENTS_PER_CONN) + { + stop_reading(sock); + return; + } + break; + default: + INSIST(0); + } + } +} + +static void +resume_processing(void *arg) { + isc_nmsocket_t *sock = (isc_nmsocket_t *)arg; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(sock->type == isc_nm_tcpdnssocket); + REQUIRE(!atomic_load(&sock->client)); + + if (inactive(sock)) { + return; + } + + if (atomic_load(&sock->ah) == 0) { + /* Nothing is active; sockets can timeout now */ + start_sock_timer(sock); + } + + process_sock_buffer(sock); +} diff --git a/lib/isc/netmgr/tls.c b/lib/isc/netmgr/tls.c index fdface5aa2..1d80abcf47 100644 --- a/lib/isc/netmgr/tls.c +++ b/lib/isc/netmgr/tls.c @@ -91,8 +91,7 @@ tls_senddone(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg) { static void async_tls_do_bio(isc_nmsocket_t *sock) { isc__netievent_tlsdobio_t *ievent = - isc__nm_get_ievent(sock->mgr, netievent_tlsdobio); - ievent->sock = sock; + isc__nm_get_netievent_tlsdobio(sock->mgr, sock); isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)ievent); } @@ -314,10 +313,11 @@ initialize_tls(isc_nmsocket_t *sock, bool server) { static isc_result_t tlslisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { + REQUIRE(VALID_NMSOCK(cbarg)); isc_nmsocket_t *tlslistensock = (isc_nmsocket_t *)cbarg; isc_nmsocket_t *tlssock = NULL; + int r; - REQUIRE(VALID_NMSOCK(tlslistensock)); REQUIRE(tlslistensock->type == isc_nm_tlslistener); /* If accept() was unsuccessful we can't do anything */ @@ -350,8 +350,10 @@ tlslisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { return (ISC_R_TLSERROR); } - uv_timer_init(&tlssock->mgr->workers[isc_nm_tid()].loop, - &tlssock->timer); + r = uv_timer_init(&tlssock->mgr->workers[isc_nm_tid()].loop, + &tlssock->timer); + RUNTIME_CHECK(r == 0); + tlssock->timer.data = tlssock; tlssock->timer_initialized = true; tlssock->tls.ctx = tlslistensock->tls.ctx; @@ -410,7 +412,8 @@ isc__nm_async_tlssend(isc__networker_t *worker, isc__netievent_t *ev0) { isc__nm_uvreq_t *req = ievent->req; ievent->req = NULL; REQUIRE(VALID_UVREQ(req)); - REQUIRE(worker->id == sock->tid); + REQUIRE(sock->tid == isc_nm_tid()); + UNUSED(worker); if (inactive(sock)) { req->cb.send(req->handle, ISC_R_CANCELED, req->cbarg); @@ -449,7 +452,7 @@ isc__nm_async_tlssend(isc__networker_t *worker, isc__netievent_t *ev0) { void isc__nm_tls_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb, void *cbarg) { - isc__netievent_tcpsend_t *ievent = NULL; + isc__netievent_tlssend_t *ievent = NULL; isc__nm_uvreq_t *uvreq = NULL; isc_nmsocket_t *sock = NULL; REQUIRE(VALID_NMHANDLE(handle)); @@ -475,60 +478,61 @@ isc__nm_tls_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb, /* * We need to create an event and pass it using async channel */ - ievent = isc__nm_get_ievent(sock->mgr, netievent_tlssend); - ievent->sock = sock; - ievent->req = uvreq; + ievent = isc__nm_get_netievent_tlssend(sock->mgr, sock, uvreq); isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)ievent); } void -isc__nm_async_tls_startread(isc__networker_t *worker, isc__netievent_t *ev0) { - isc__netievent_startread_t *ievent = (isc__netievent_startread_t *)ev0; +isc__nm_async_tlsstartread(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_tlsstartread_t *ievent = + (isc__netievent_tlsstartread_t *)ev0; isc_nmsocket_t *sock = ievent->sock; - REQUIRE(worker->id == isc_nm_tid()); + REQUIRE(sock->tid == isc_nm_tid()); + UNUSED(worker); tls_do_bio(sock); } void isc__nm_tls_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { - isc_nmsocket_t *sock = NULL; - isc__netievent_startread_t *ievent = NULL; - REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); + REQUIRE(handle->sock->statichandle == handle); + REQUIRE(handle->sock->tid == isc_nm_tid()); - sock = handle->sock; - - REQUIRE(sock->statichandle == handle); - REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(sock->recv_cb == NULL); - REQUIRE(sock->tid == isc_nm_tid()); + isc__netievent_tlsstartread_t *ievent = NULL; + isc_nmsocket_t *sock = handle->sock; if (inactive(sock)) { cb(handle, ISC_R_NOTCONNECTED, NULL, cbarg); return; } - sock = handle->sock; sock->recv_cb = cb; sock->recv_cbarg = cbarg; - ievent = isc__nm_get_ievent(sock->mgr, netievent_tlsstartread); - ievent->sock = sock; - + ievent = isc__nm_get_netievent_tlsstartread(sock->mgr, sock); isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)ievent); } void -isc__nm_tls_pauseread(isc_nmsocket_t *sock) { +isc__nm_tls_pauseread(isc_nmhandle_t *handle) { + REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); + isc_nmsocket_t *sock = handle->sock; + atomic_store(&sock->readpaused, true); } void -isc__nm_tls_resumeread(isc_nmsocket_t *sock) { +isc__nm_tls_resumeread(isc_nmhandle_t *handle) { + REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); + isc_nmsocket_t *sock = handle->sock; + atomic_store(&sock->readpaused, false); async_tls_do_bio(sock); } @@ -536,12 +540,12 @@ isc__nm_tls_resumeread(isc_nmsocket_t *sock) { static void timer_close_cb(uv_handle_t *handle) { isc_nmsocket_t *sock = (isc_nmsocket_t *)uv_handle_get_data(handle); - INSIST(VALID_NMSOCK(sock)); tls_close_direct(sock); } static void tls_close_direct(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->tid == isc_nm_tid()); if (sock->timer_running) { @@ -602,9 +606,7 @@ isc__nm_tls_close(isc_nmsocket_t *sock) { tls_close_direct(sock); } else { isc__netievent_tlsclose_t *ievent = - isc__nm_get_ievent(sock->mgr, netievent_tlsclose); - - ievent->sock = sock; + isc__nm_get_netievent_tlsclose(sock->mgr, sock); isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)ievent); } @@ -614,7 +616,8 @@ void isc__nm_async_tlsclose(isc__networker_t *worker, isc__netievent_t *ev0) { isc__netievent_tlsclose_t *ievent = (isc__netievent_tlsclose_t *)ev0; - REQUIRE(worker->id == ievent->sock->tid); + REQUIRE(ievent->sock->tid == isc_nm_tid()); + UNUSED(worker); tls_close_direct(ievent->sock); } @@ -644,7 +647,7 @@ isc_result_t isc_nm_tlsconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer, isc_nm_cb_t cb, void *cbarg, SSL_CTX *ctx, unsigned int timeout, size_t extrahandlesize) { - isc_nmsocket_t *nsock = NULL, *tmp = NULL; + isc_nmsocket_t *nsock = NULL; isc__netievent_tlsconnect_t *ievent = NULL; isc_result_t result = ISC_R_SUCCESS; @@ -653,7 +656,7 @@ isc_nm_tlsconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer, nsock = isc_mem_get(mgr->mctx, sizeof(*nsock)); isc__nmsocket_init(nsock, mgr, isc_nm_tlssocket, local); nsock->extrahandlesize = extrahandlesize; - atomic_init(&nsock->result, ISC_R_SUCCESS); + nsock->result = ISC_R_SUCCESS; nsock->connect_cb = cb; nsock->connect_cbarg = cbarg; nsock->connect_timeout = timeout; @@ -667,31 +670,22 @@ isc_nm_tlsconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer, return (ISC_R_TLSERROR); } - ievent = isc__nm_get_ievent(mgr, netievent_tlsconnect); - ievent->sock = nsock; + ievent = isc__nm_get_netievent_tlsconnect(mgr, nsock); ievent->local = local->addr; ievent->peer = peer->addr; ievent->ctx = ctx; - /* - * Async callbacks can dereference the socket in the meantime, - * we need to hold an additional reference to it. - */ - isc__nmsocket_attach(nsock, &tmp); - if (isc__nm_in_netthread()) { nsock->tid = isc_nm_tid(); isc__nm_async_tlsconnect(&mgr->workers[nsock->tid], (isc__netievent_t *)ievent); - isc__nm_put_ievent(mgr, ievent); + isc__nm_put_netievent_tlsconnect(mgr, ievent); } else { nsock->tid = isc_random_uniform(mgr->nworkers); isc__nm_enqueue_ievent(&mgr->workers[nsock->tid], (isc__netievent_t *)ievent); } - isc__nmsocket_detach(&tmp); - return (result); } @@ -703,8 +697,9 @@ tls_connect_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { if (result != ISC_R_SUCCESS) { tlssock->connect_cb(handle, result, tlssock->connect_cbarg); - atomic_store(&tlssock->result, result); - atomic_store(&tlssock->connect_error, true); + LOCK(&tlssock->parent->lock); + tlssock->parent->result = result; + UNLOCK(&tlssock->parent->lock); tls_close_direct(tlssock); return; } @@ -716,8 +711,9 @@ tls_connect_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { result = initialize_tls(tlssock, false); if (result != ISC_R_SUCCESS) { tlssock->connect_cb(handle, result, tlssock->connect_cbarg); - atomic_store(&tlssock->result, result); - atomic_store(&tlssock->connect_error, true); + LOCK(&tlssock->parent->lock); + tlssock->parent->result = result; + UNLOCK(&tlssock->parent->lock); tls_close_direct(tlssock); return; } @@ -728,12 +724,15 @@ isc__nm_async_tlsconnect(isc__networker_t *worker, isc__netievent_t *ev0) { (isc__netievent_tlsconnect_t *)ev0; isc_nmsocket_t *tlssock = ievent->sock; isc_result_t result; + int r; UNUSED(worker); tlssock->tid = isc_nm_tid(); - uv_timer_init(&tlssock->mgr->workers[isc_nm_tid()].loop, - &tlssock->timer); + r = uv_timer_init(&tlssock->mgr->workers[isc_nm_tid()].loop, + &tlssock->timer); + RUNTIME_CHECK(r == 0); + tlssock->timer.data = tlssock; tlssock->timer_initialized = true; tlssock->tls.state = TLS_INIT; @@ -745,15 +744,16 @@ isc__nm_async_tlsconnect(isc__networker_t *worker, isc__netievent_t *ev0) { if (result != ISC_R_SUCCESS) { /* FIXME: We need to pass valid handle */ tlssock->connect_cb(NULL, result, tlssock->connect_cbarg); - atomic_store(&tlssock->result, result); - atomic_store(&tlssock->connect_error, true); + LOCK(&tlssock->parent->lock); + tlssock->parent->result = result; + UNLOCK(&tlssock->parent->lock); tls_close_direct(tlssock); return; } } void -isc__nm_async_tls_do_bio(isc__networker_t *worker, isc__netievent_t *ev0) { +isc__nm_async_tlsdobio(isc__networker_t *worker, isc__netievent_t *ev0) { UNUSED(worker); isc__netievent_tlsdobio_t *ievent = (isc__netievent_tlsdobio_t *)ev0; tls_do_bio(ievent->sock); diff --git a/lib/isc/netmgr/tlsdns.c b/lib/isc/netmgr/tlsdns.c new file mode 100644 index 0000000000..b56b5ea160 --- /dev/null +++ b/lib/isc/netmgr/tlsdns.c @@ -0,0 +1,984 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "netmgr-int.h" +#include "uv-compat.h" + +#define TLSDNS_CLIENTS_PER_CONN 23 +/*%< + * + * Maximum number of simultaneous handles in flight supported for a single + * connected TLSDNS socket. This value was chosen arbitrarily, and may be + * changed in the future. + */ + +static void +dnslisten_readcb(isc_nmhandle_t *handle, isc_result_t eresult, + isc_region_t *region, void *arg); + +static void +resume_processing(void *arg); + +static void +tlsdns_close_direct(isc_nmsocket_t *sock); + +static inline size_t +dnslen(unsigned char *base) { + return ((base[0] << 8) + (base[1])); +} + +/* + * COMPAT CODE + */ + +static void * +isc__nm_get_ievent(isc_nm_t *mgr, isc__netievent_type type) { + isc__netievent_storage_t *event = isc_mempool_get(mgr->evpool); + + *event = (isc__netievent_storage_t){ .ni.type = type }; + return (event); +} + +/* + * Regular TCP buffer, should suffice in most cases. + */ +#define NM_REG_BUF 4096 +/* + * Two full DNS packets with lengths. + * netmgr receives 64k at most so there's no risk + * of overrun. + */ +#define NM_BIG_BUF (65535 + 2) * 2 +static inline void +alloc_dnsbuf(isc_nmsocket_t *sock, size_t len) { + REQUIRE(len <= NM_BIG_BUF); + + if (sock->buf == NULL) { + /* We don't have the buffer at all */ + size_t alloc_len = len < NM_REG_BUF ? NM_REG_BUF : NM_BIG_BUF; + sock->buf = isc_mem_allocate(sock->mgr->mctx, alloc_len); + sock->buf_size = alloc_len; + } else { + /* We have the buffer but it's too small */ + sock->buf = isc_mem_reallocate(sock->mgr->mctx, sock->buf, + NM_BIG_BUF); + sock->buf_size = NM_BIG_BUF; + } +} + +static void +timer_close_cb(uv_handle_t *handle) { + isc_nmsocket_t *sock = (isc_nmsocket_t *)uv_handle_get_data(handle); + + REQUIRE(VALID_NMSOCK(sock)); + + atomic_store(&sock->closed, true); + tlsdns_close_direct(sock); +} + +static void +dnstcp_readtimeout(uv_timer_t *timer) { + isc_nmsocket_t *sock = + (isc_nmsocket_t *)uv_handle_get_data((uv_handle_t *)timer); + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + + /* Close the TCP connection; its closure should fire ours. */ + if (sock->outerhandle != NULL) { + isc_nmhandle_detach(&sock->outerhandle); + } +} + +/* + * Accept callback for TCP-DNS connection. + */ +static isc_result_t +dnslisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { + isc_nmsocket_t *dnslistensock = (isc_nmsocket_t *)cbarg; + isc_nmsocket_t *dnssock = NULL; + isc_nmhandle_t *readhandle = NULL; + isc_nm_accept_cb_t accept_cb; + void *accept_cbarg; + + REQUIRE(VALID_NMSOCK(dnslistensock)); + REQUIRE(dnslistensock->type == isc_nm_tlsdnslistener); + + if (result != ISC_R_SUCCESS) { + return (result); + } + + accept_cb = dnslistensock->accept_cb; + accept_cbarg = dnslistensock->accept_cbarg; + + if (accept_cb != NULL) { + result = accept_cb(handle, ISC_R_SUCCESS, accept_cbarg); + if (result != ISC_R_SUCCESS) { + return (result); + } + } + + /* We need to create a 'wrapper' dnssocket for this connection */ + dnssock = isc_mem_get(handle->sock->mgr->mctx, sizeof(*dnssock)); + isc__nmsocket_init(dnssock, handle->sock->mgr, isc_nm_tlsdnssocket, + handle->sock->iface); + + dnssock->extrahandlesize = dnslistensock->extrahandlesize; + isc__nmsocket_attach(dnslistensock, &dnssock->listener); + + isc__nmsocket_attach(dnssock, &dnssock->self); + + isc_nmhandle_attach(handle, &dnssock->outerhandle); + + dnssock->peer = handle->sock->peer; + dnssock->read_timeout = handle->sock->mgr->init; + dnssock->tid = isc_nm_tid(); + dnssock->closehandle_cb = resume_processing; + + uv_timer_init(&dnssock->mgr->workers[isc_nm_tid()].loop, + &dnssock->timer); + dnssock->timer.data = dnssock; + dnssock->timer_initialized = true; + uv_timer_start(&dnssock->timer, dnstcp_readtimeout, + dnssock->read_timeout, 0); + dnssock->timer_running = true; + + /* + * Add a reference to handle to keep it from being freed by + * the caller. It will be detached in dnslisted_readcb() when + * the connection is closed or there is no more data to be read. + */ + isc_nmhandle_attach(handle, &readhandle); + isc_nm_read(readhandle, dnslisten_readcb, dnssock); + isc__nmsocket_detach(&dnssock); + + return (ISC_R_SUCCESS); +} + +/* + * Process a single packet from the incoming buffer. + * + * Return ISC_R_SUCCESS and attach 'handlep' to a handle if something + * was processed; return ISC_R_NOMORE if there isn't a full message + * to be processed. + * + * The caller will need to unreference the handle. + */ +static isc_result_t +processbuffer(isc_nmsocket_t *dnssock, isc_nmhandle_t **handlep) { + size_t len; + + REQUIRE(VALID_NMSOCK(dnssock)); + REQUIRE(dnssock->tid == isc_nm_tid()); + REQUIRE(handlep != NULL && *handlep == NULL); + + /* + * If we don't even have the length yet, we can't do + * anything. + */ + if (dnssock->buf_len < 2) { + return (ISC_R_NOMORE); + } + + /* + * Process the first packet from the buffer, leaving + * the rest (if any) for later. + */ + len = dnslen(dnssock->buf); + if (len <= dnssock->buf_len - 2) { + isc_nmhandle_t *dnshandle = NULL; + isc_nmsocket_t *listener = NULL; + isc_nm_recv_cb_t cb = NULL; + void *cbarg = NULL; + + if (atomic_load(&dnssock->client) && + dnssock->statichandle != NULL) { + isc_nmhandle_attach(dnssock->statichandle, &dnshandle); + } else { + dnshandle = isc__nmhandle_get(dnssock, NULL, NULL); + } + + listener = dnssock->listener; + if (listener != NULL) { + cb = listener->recv_cb; + cbarg = listener->recv_cbarg; + } else if (dnssock->recv_cb != NULL) { + cb = dnssock->recv_cb; + cbarg = dnssock->recv_cbarg; + /* + * We need to clear the read callback *before* + * calling it, because it might make another + * call to isc_nm_read() and set up a new callback. + */ + isc__nmsocket_clearcb(dnssock); + } + + if (cb != NULL) { + cb(dnshandle, ISC_R_SUCCESS, + &(isc_region_t){ .base = dnssock->buf + 2, + .length = len }, + cbarg); + } + + len += 2; + dnssock->buf_len -= len; + if (len > 0) { + memmove(dnssock->buf, dnssock->buf + len, + dnssock->buf_len); + } + + *handlep = dnshandle; + return (ISC_R_SUCCESS); + } + + return (ISC_R_NOMORE); +} + +/* + * We've got a read on our underlying socket. Check whether + * we have a complete DNS packet and, if so, call the callback. + */ +static void +dnslisten_readcb(isc_nmhandle_t *handle, isc_result_t eresult, + isc_region_t *region, void *arg) { + isc_nmsocket_t *dnssock = (isc_nmsocket_t *)arg; + unsigned char *base = NULL; + bool done = false; + size_t len; + + REQUIRE(VALID_NMSOCK(dnssock)); + REQUIRE(dnssock->tid == isc_nm_tid()); + REQUIRE(VALID_NMHANDLE(handle)); + + if (!isc__nmsocket_active(dnssock) || atomic_load(&dnssock->closing) || + dnssock->outerhandle == NULL || + (dnssock->listener != NULL && + !isc__nmsocket_active(dnssock->listener)) || + atomic_load(&dnssock->mgr->closing)) + { + if (eresult == ISC_R_SUCCESS) { + eresult = ISC_R_CANCELED; + } + } + + if (region == NULL || eresult != ISC_R_SUCCESS) { + isc_nm_recv_cb_t cb = dnssock->recv_cb; + void *cbarg = dnssock->recv_cbarg; + + /* Connection closed */ + dnssock->result = eresult; + isc__nmsocket_clearcb(dnssock); + if (atomic_load(&dnssock->client) && cb != NULL) { + cb(dnssock->statichandle, eresult, NULL, cbarg); + } + + if (dnssock->self != NULL) { + isc__nmsocket_detach(&dnssock->self); + } + if (dnssock->outerhandle != NULL) { + isc__nmsocket_clearcb(dnssock->outerhandle->sock); + isc_nmhandle_detach(&dnssock->outerhandle); + } + if (dnssock->listener != NULL) { + isc__nmsocket_detach(&dnssock->listener); + } + + /* + * Server connections will hold two handle references when + * shut down, but client (tlsdnsconnect) connections have + * only one. + */ + if (!atomic_load(&dnssock->client)) { + isc_nmhandle_detach(&handle); + } + return; + } + + base = region->base; + len = region->length; + + if (dnssock->buf_len + len > dnssock->buf_size) { + alloc_dnsbuf(dnssock, dnssock->buf_len + len); + } + memmove(dnssock->buf + dnssock->buf_len, base, len); + dnssock->buf_len += len; + + dnssock->read_timeout = (atomic_load(&dnssock->keepalive) + ? dnssock->mgr->keepalive + : dnssock->mgr->idle); + + do { + isc_result_t result; + isc_nmhandle_t *dnshandle = NULL; + + result = processbuffer(dnssock, &dnshandle); + if (result != ISC_R_SUCCESS) { + /* + * There wasn't anything in the buffer to process. + */ + return; + } + + /* + * We have a packet: stop timeout timers + */ + if (dnssock->timer_initialized) { + uv_timer_stop(&dnssock->timer); + } + + if (atomic_load(&dnssock->sequential) || + dnssock->recv_cb == NULL) { + /* + * There are two reasons we might want to pause here: + * - We're in sequential mode and we've received + * a whole packet, so we're done until it's been + * processed; or + * - We no longer have a read callback. + */ + isc_nm_pauseread(dnssock->outerhandle); + done = true; + } else { + /* + * We're pipelining, so we now resume processing + * packets until the clients-per-connection limit + * is reached (as determined by the number of + * active handles on the socket). When the limit + * is reached, pause reading. + */ + if (atomic_load(&dnssock->ah) >= + TLSDNS_CLIENTS_PER_CONN) { + isc_nm_pauseread(dnssock->outerhandle); + done = true; + } + } + + isc_nmhandle_detach(&dnshandle); + } while (!done); +} + +/* + * isc_nm_listentlsdns works exactly as listentlsdns but on an SSL socket. + */ +isc_result_t +isc_nm_listentlsdns(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb, + void *cbarg, isc_nm_accept_cb_t accept_cb, + void *accept_cbarg, size_t extrahandlesize, int backlog, + isc_quota_t *quota, SSL_CTX *sslctx, + isc_nmsocket_t **sockp) { + isc_nmsocket_t *dnslistensock = isc_mem_get(mgr->mctx, + sizeof(*dnslistensock)); + isc_result_t result; + + REQUIRE(VALID_NM(mgr)); + REQUIRE(sslctx != NULL); + + isc__nmsocket_init(dnslistensock, mgr, isc_nm_tlsdnslistener, iface); + dnslistensock->recv_cb = cb; + dnslistensock->recv_cbarg = cbarg; + dnslistensock->accept_cb = accept_cb; + dnslistensock->accept_cbarg = accept_cbarg; + dnslistensock->extrahandlesize = extrahandlesize; + + result = isc_nm_listentls(mgr, iface, dnslisten_acceptcb, dnslistensock, + extrahandlesize, backlog, quota, sslctx, + &dnslistensock->outer); + if (result == ISC_R_SUCCESS) { + atomic_store(&dnslistensock->listening, true); + *sockp = dnslistensock; + return (ISC_R_SUCCESS); + } else { + atomic_store(&dnslistensock->closed, true); + isc__nmsocket_detach(&dnslistensock); + return (result); + } +} + +void +isc__nm_async_tlsdnsstop(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_tlsdnsstop_t *ievent = + (isc__netievent_tlsdnsstop_t *)ev0; + isc_nmsocket_t *sock = ievent->sock; + + UNUSED(worker); + + REQUIRE(isc__nm_in_netthread()); + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tlsdnslistener); + REQUIRE(sock->tid == isc_nm_tid()); + + atomic_store(&sock->listening, false); + atomic_store(&sock->closed, true); + + isc__nmsocket_clearcb(sock); + + if (sock->outer != NULL) { + isc__nm_tls_stoplistening(sock->outer); + isc__nmsocket_detach(&sock->outer); + } +} + +void +isc__nm_tlsdns_stoplistening(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tlsdnslistener); + + isc__netievent_tlsdnsstop_t *ievent = + isc__nm_get_ievent(sock->mgr, netievent_tlsdnsstop); + isc__nmsocket_attach(sock, &ievent->sock); + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); +} + +void +isc_nm_tlsdns_sequential(isc_nmhandle_t *handle) { + REQUIRE(VALID_NMHANDLE(handle)); + + if (handle->sock->type != isc_nm_tlsdnssocket || + handle->sock->outerhandle == NULL) + { + return; + } + + /* + * We don't want pipelining on this connection. That means + * that we need to pause after reading each request, and + * resume only after the request has been processed. This + * is done in resume_processing(), which is the socket's + * closehandle_cb callback, called whenever a handle + * is released. + */ + isc_nm_pauseread(handle->sock->outerhandle); + atomic_store(&handle->sock->sequential, true); +} + +void +isc_nm_tlsdns_keepalive(isc_nmhandle_t *handle, bool value) { + REQUIRE(VALID_NMHANDLE(handle)); + + if (handle->sock->type != isc_nm_tlsdnssocket || + handle->sock->outerhandle == NULL) + { + return; + } + + atomic_store(&handle->sock->keepalive, value); + atomic_store(&handle->sock->outerhandle->sock->keepalive, value); +} + +static void +resume_processing(void *arg) { + isc_nmsocket_t *sock = (isc_nmsocket_t *)arg; + isc_result_t result; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + + if (sock->type != isc_nm_tlsdnssocket || sock->outerhandle == NULL) { + return; + } + + if (atomic_load(&sock->ah) == 0) { + /* Nothing is active; sockets can timeout now */ + if (sock->timer_initialized) { + uv_timer_start(&sock->timer, dnstcp_readtimeout, + sock->read_timeout, 0); + sock->timer_running = true; + } + } + + /* + * For sequential sockets: Process what's in the buffer, or + * if there aren't any messages buffered, resume reading. + */ + if (atomic_load(&sock->sequential)) { + isc_nmhandle_t *handle = NULL; + + result = processbuffer(sock, &handle); + if (result == ISC_R_SUCCESS) { + if (sock->timer_initialized) { + uv_timer_stop(&sock->timer); + } + isc_nmhandle_detach(&handle); + } else if (sock->outerhandle != NULL) { + isc_nm_resumeread(sock->outerhandle); + } + + return; + } + + /* + * For pipelined sockets: If we're under the clients-per-connection + * limit, resume processing until we reach the limit again. + */ + do { + isc_nmhandle_t *dnshandle = NULL; + + result = processbuffer(sock, &dnshandle); + if (result != ISC_R_SUCCESS) { + /* + * Nothing in the buffer; resume reading. + */ + if (sock->outerhandle != NULL) { + isc_nm_resumeread(sock->outerhandle); + } + + break; + } + + if (sock->timer_initialized) { + uv_timer_stop(&sock->timer); + } + isc_nmhandle_detach(&dnshandle); + } while (atomic_load(&sock->ah) < TLSDNS_CLIENTS_PER_CONN); +} + +static void +tlsdnssend_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { + isc__nm_uvreq_t *req = (isc__nm_uvreq_t *)cbarg; + REQUIRE(VALID_UVREQ(req)); + + UNUSED(handle); + + req->cb.send(req->handle, result, req->cbarg); + isc_mem_put(req->sock->mgr->mctx, req->uvbuf.base, req->uvbuf.len); + isc__nm_uvreq_put(&req, req->handle->sock); + isc_nmhandle_detach(&handle); +} + +/* + * The socket is closing, outerhandle has been detached, listener is + * inactive, or the netmgr is closing: any operation on it should abort + * with ISC_R_CANCELED. + */ +static bool +inactive(isc_nmsocket_t *sock) { + return (!isc__nmsocket_active(sock) || atomic_load(&sock->closing) || + sock->outerhandle == NULL || + (sock->listener != NULL && + !isc__nmsocket_active(sock->listener)) || + atomic_load(&sock->mgr->closing)); +} + +void +isc__nm_async_tlsdnssend(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_tlsdnssend_t *ievent = + (isc__netievent_tlsdnssend_t *)ev0; + isc__nm_uvreq_t *req = ievent->req; + isc_nmsocket_t *sock = ievent->sock; + isc_nmhandle_t *sendhandle = NULL; + isc_region_t r; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(VALID_UVREQ(req)); + REQUIRE(worker->id == sock->tid); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(sock->type == isc_nm_tlsdnssocket); + + if (inactive(sock)) { + req->cb.send(req->handle, ISC_R_CANCELED, req->cbarg); + isc_mem_put(sock->mgr->mctx, req->uvbuf.base, req->uvbuf.len); + isc__nm_uvreq_put(&req, req->handle->sock); + return; + } + + r.base = (unsigned char *)req->uvbuf.base; + r.length = req->uvbuf.len; + isc_nmhandle_attach(sock->outerhandle, &sendhandle); + isc_nm_send(sendhandle, &r, tlsdnssend_cb, req); +} + +/* + * isc__nm_tcp_send sends buf to a peer on a socket. + */ +void +isc__nm_tlsdns_send(isc_nmhandle_t *handle, isc_region_t *region, + isc_nm_cb_t cb, void *cbarg) { + isc__nm_uvreq_t *uvreq = NULL; + + REQUIRE(VALID_NMHANDLE(handle)); + + isc_nmsocket_t *sock = handle->sock; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tlsdnssocket); + + if (inactive(sock)) { + cb(handle, ISC_R_CANCELED, cbarg); + return; + } + + uvreq = isc__nm_uvreq_get(sock->mgr, sock); + isc_nmhandle_attach(handle, &uvreq->handle); + uvreq->cb.send = cb; + uvreq->cbarg = cbarg; + + uvreq->uvbuf.base = isc_mem_get(sock->mgr->mctx, region->length + 2); + uvreq->uvbuf.len = region->length + 2; + *(uint16_t *)uvreq->uvbuf.base = htons(region->length); + memmove(uvreq->uvbuf.base + 2, region->base, region->length); + + isc__netievent_tlsdnssend_t *ievent = NULL; + + ievent = isc__nm_get_ievent(sock->mgr, netievent_tlsdnssend); + ievent->req = uvreq; + isc__nmsocket_attach(sock, &ievent->sock); + + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); +} + +static void +tlsdns_close_direct(isc_nmsocket_t *sock) { + REQUIRE(sock->tid == isc_nm_tid()); + + if (sock->timer_running) { + uv_timer_stop(&sock->timer); + sock->timer_running = false; + } + + /* We don't need atomics here, it's all in single network thread */ + if (sock->self != NULL) { + isc__nmsocket_detach(&sock->self); + } else if (sock->timer_initialized) { + /* + * We need to fire the timer callback to clean it up, + * it will then call us again (via detach) so that we + * can finally close the socket. + */ + sock->timer_initialized = false; + uv_timer_stop(&sock->timer); + uv_close((uv_handle_t *)&sock->timer, timer_close_cb); + } else { + /* + * At this point we're certain that there are no external + * references, we can close everything. + */ + if (sock->outerhandle != NULL) { + isc__nmsocket_clearcb(sock->outerhandle->sock); + isc_nmhandle_detach(&sock->outerhandle); + } + if (sock->listener != NULL) { + isc__nmsocket_detach(&sock->listener); + } + atomic_store(&sock->closed, true); + isc__nmsocket_prep_destroy(sock); + } +} + +void +isc__nm_tlsdns_close(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tlsdnssocket); + + if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false }, + true)) { + return; + } + + if (sock->tid == isc_nm_tid()) { + tlsdns_close_direct(sock); + } else { + isc__netievent_tlsdnsclose_t *ievent = + isc__nm_get_ievent(sock->mgr, netievent_tlsdnsclose); + + isc__nmsocket_attach(sock, &ievent->sock); + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); + } +} + +void +isc__nm_async_tlsdnsclose(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_tlsdnsclose_t *ievent = + (isc__netievent_tlsdnsclose_t *)ev0; + isc_nmsocket_t *sock = ievent->sock; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + + UNUSED(worker); + + tlsdns_close_direct(ievent->sock); +} + +typedef struct { + isc_mem_t *mctx; + isc_nm_cb_t cb; + void *cbarg; + size_t extrahandlesize; +} tcpconnect_t; + +static void +tlsdnsconnect_cb(isc_nmhandle_t *handle, isc_result_t result, void *arg) { + tcpconnect_t *conn = (tcpconnect_t *)arg; + isc_nm_cb_t cb = conn->cb; + void *cbarg = conn->cbarg; + size_t extrahandlesize = conn->extrahandlesize; + isc_nmsocket_t *dnssock = NULL; + isc_nmhandle_t *readhandle = NULL; + + REQUIRE(result != ISC_R_SUCCESS || VALID_NMHANDLE(handle)); + + isc_mem_putanddetach(&conn->mctx, conn, sizeof(*conn)); + + dnssock = isc_mem_get(handle->sock->mgr->mctx, sizeof(*dnssock)); + isc__nmsocket_init(dnssock, handle->sock->mgr, isc_nm_tlsdnssocket, + handle->sock->iface); + + dnssock->extrahandlesize = extrahandlesize; + isc_nmhandle_attach(handle, &dnssock->outerhandle); + + dnssock->peer = handle->sock->peer; + dnssock->read_timeout = handle->sock->mgr->init; + dnssock->tid = isc_nm_tid(); + + atomic_init(&dnssock->client, true); + + readhandle = isc__nmhandle_get(dnssock, NULL, NULL); + + if (result != ISC_R_SUCCESS) { + cb(readhandle, result, cbarg); + isc__nmsocket_detach(&dnssock); + isc_nmhandle_detach(&readhandle); + return; + } + + INSIST(dnssock->statichandle != NULL); + INSIST(dnssock->statichandle == readhandle); + INSIST(readhandle->sock == dnssock); + INSIST(dnssock->recv_cb == NULL); + + uv_timer_init(&dnssock->mgr->workers[isc_nm_tid()].loop, + &dnssock->timer); + dnssock->timer.data = dnssock; + dnssock->timer_initialized = true; + uv_timer_start(&dnssock->timer, dnstcp_readtimeout, + dnssock->read_timeout, 0); + dnssock->timer_running = true; + + /* + * The connection is now established; we start reading immediately, + * before we've been asked to. We'll read and buffer at most one + * packet. + */ + isc_nm_read(handle, dnslisten_readcb, dnssock); + cb(readhandle, ISC_R_SUCCESS, cbarg); + + /* + * The sock is now attached to the handle. + */ + isc__nmsocket_detach(&dnssock); +} + +isc_result_t +isc_nm_tlsdnsconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer, + isc_nm_cb_t cb, void *cbarg, unsigned int timeout, + size_t extrahandlesize) { + isc_result_t result = ISC_R_SUCCESS; + tcpconnect_t *conn = isc_mem_get(mgr->mctx, sizeof(tcpconnect_t)); + SSL_CTX *ctx = NULL; + + *conn = (tcpconnect_t){ .cb = cb, + .cbarg = cbarg, + .extrahandlesize = extrahandlesize }; + isc_mem_attach(mgr->mctx, &conn->mctx); + + ctx = SSL_CTX_new(SSLv23_client_method()); + result = isc_nm_tlsconnect(mgr, local, peer, tlsdnsconnect_cb, conn, + ctx, timeout, 0); + SSL_CTX_free(ctx); + if (result != ISC_R_SUCCESS) { + isc_mem_putanddetach(&conn->mctx, conn, sizeof(*conn)); + } + return (result); +} + +void +isc__nm_tlsdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { + isc_nmsocket_t *sock = NULL; + isc__netievent_tlsdnsread_t *ievent = NULL; + isc_nmhandle_t *eventhandle = NULL; + + REQUIRE(VALID_NMHANDLE(handle)); + + sock = handle->sock; + + REQUIRE(sock->statichandle == handle); + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->recv_cb == NULL); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(atomic_load(&sock->client)); + + if (inactive(sock)) { + cb(handle, ISC_R_NOTCONNECTED, NULL, cbarg); + return; + } + + /* + * This MUST be done asynchronously, no matter which thread we're + * in. The callback function for isc_nm_read() often calls + * isc_nm_read() again; if we tried to do that synchronously + * we'd clash in processbuffer() and grow the stack indefinitely. + */ + ievent = isc__nm_get_ievent(sock->mgr, netievent_tlsdnsread); + isc__nmsocket_attach(sock, &ievent->sock); + + sock->recv_cb = cb; + sock->recv_cbarg = cbarg; + + sock->read_timeout = (atomic_load(&sock->keepalive) + ? sock->mgr->keepalive + : sock->mgr->idle); + + /* + * Add a reference to the handle to keep it from being freed by + * the caller; it will be detached in in isc__nm_async_tlsdnsread(). + */ + isc_nmhandle_attach(handle, &eventhandle); + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); +} + +void +isc__nm_async_tlsdnsread(isc__networker_t *worker, isc__netievent_t *ev0) { + isc_result_t result; + isc__netievent_tlsdnsread_t *ievent = + (isc__netievent_tlsdnsclose_t *)ev0; + isc_nmsocket_t *sock = ievent->sock; + isc_nmhandle_t *handle = NULL, *newhandle = NULL; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(worker->id == sock->tid); + REQUIRE(sock->tid == isc_nm_tid()); + + handle = sock->statichandle; + + if (inactive(sock)) { + isc_nm_recv_cb_t cb = sock->recv_cb; + void *cbarg = sock->recv_cbarg; + + isc__nmsocket_clearcb(sock); + if (cb != NULL) { + cb(handle, ISC_R_NOTCONNECTED, NULL, cbarg); + } + + isc_nmhandle_detach(&handle); + return; + } + + /* + * Maybe we have a packet already? + */ + result = processbuffer(sock, &newhandle); + if (result == ISC_R_SUCCESS) { + if (sock->timer_initialized) { + uv_timer_stop(&sock->timer); + } + isc_nmhandle_detach(&newhandle); + } else if (sock->outerhandle != NULL) { + /* Restart reading, wait for the callback */ + if (sock->timer_initialized) { + uv_timer_start(&sock->timer, dnstcp_readtimeout, + sock->read_timeout, 0); + sock->timer_running = true; + } + isc_nm_resumeread(sock->outerhandle); + } else { + isc_nm_recv_cb_t cb = sock->recv_cb; + void *cbarg = sock->recv_cbarg; + + isc__nmsocket_clearcb(sock); + cb(handle, ISC_R_NOTCONNECTED, NULL, cbarg); + } + + isc_nmhandle_detach(&handle); +} + +void +isc__nm_tlsdns_cancelread(isc_nmhandle_t *handle) { + isc_nmsocket_t *sock = NULL; + isc__netievent_tlsdnscancel_t *ievent = NULL; + + REQUIRE(VALID_NMHANDLE(handle)); + + sock = handle->sock; + + REQUIRE(sock->type == isc_nm_tlsdnssocket); + + ievent = isc__nm_get_ievent(sock->mgr, netievent_tlsdnscancel); + isc__nmsocket_attach(sock, &ievent->sock); + isc_nmhandle_attach(handle, &ievent->handle); + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); +} + +void +isc__nm_async_tlsdnscancel(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_tlsdnscancel_t *ievent = + (isc__netievent_tlsdnscancel_t *)ev0; + isc_nmsocket_t *sock = ievent->sock; + isc_nmhandle_t *handle = ievent->handle; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(worker->id == sock->tid); + REQUIRE(sock->tid == isc_nm_tid()); + + if (atomic_load(&sock->client)) { + isc_nm_recv_cb_t cb; + void *cbarg = NULL; + + cb = sock->recv_cb; + cbarg = sock->recv_cbarg; + isc__nmsocket_clearcb(sock); + + if (cb != NULL) { + cb(handle, ISC_R_EOF, NULL, cbarg); + } + + isc__nm_tcp_cancelread(sock->outerhandle); + } +} + +void +isc__nm_tlsdns_settimeout(isc_nmhandle_t *handle, uint32_t timeout) { + isc_nmsocket_t *sock = NULL; + + REQUIRE(VALID_NMHANDLE(handle)); + + sock = handle->sock; + + if (sock->outerhandle != NULL) { + isc__nm_tcp_settimeout(sock->outerhandle, timeout); + } + + sock->read_timeout = timeout; + if (sock->timer_running) { + uv_timer_start(&sock->timer, dnstcp_readtimeout, + sock->read_timeout, 0); + } +} diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index ecaf66046a..22055c3382 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -42,7 +42,10 @@ static void udp_send_cb(uv_udp_send_t *req, int status); static void -udp_close_cb(uv_handle_t *uvhandle); +udp_close_cb(uv_handle_t *handle); + +static void +timer_close_cb(uv_handle_t *handle); static void udp_close_direct(isc_nmsocket_t *sock); @@ -54,6 +57,19 @@ static void failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, isc_result_t eresult); +static void +stop_udp_parent(isc_nmsocket_t *sock); +static void +stop_udp_child(isc_nmsocket_t *sock); + +static void +start_reading(isc_nmsocket_t *sock); +static void +stop_reading(isc_nmsocket_t *sock); + +static isc__nm_uvreq_t * +get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr); + static bool inactive(isc_nmsocket_t *sock) { return (!isc__nmsocket_active(sock) || @@ -61,10 +77,36 @@ inactive(isc_nmsocket_t *sock) { (sock->server != NULL && !isc__nmsocket_active(sock->server))); } +static isc_result_t +isc__nm_udp_lb_socket(sa_family_t sa_family, uv_os_sock_t *sockp) { + isc_result_t result; + uv_os_sock_t sock; + + result = isc__nm_socket(sa_family, SOCK_DGRAM, 0, &sock); + REQUIRE(result == ISC_R_SUCCESS); + + (void)isc__nm_socket_incoming_cpu(sock); + + (void)isc__nm_socket_dontfrag(sock, sa_family); + + result = isc__nm_socket_reuse(sock); + REQUIRE(result == ISC_R_SUCCESS); + + result = isc__nm_socket_reuse_lb(sock); + REQUIRE(result == ISC_R_SUCCESS || result == ISC_R_NOTIMPLEMENTED); + + *sockp = sock; + + return (result); +} + isc_result_t isc_nm_listenudp(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb, void *cbarg, size_t extrahandlesize, isc_nmsocket_t **sockp) { - isc_nmsocket_t *nsock = NULL; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *sock = NULL; + sa_family_t sa_family = iface->addr.type.sa.sa_family; + size_t children_size = 0; REQUIRE(VALID_NM(mgr)); @@ -72,61 +114,63 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb, * We are creating mgr->nworkers duplicated sockets, one * socket for each worker thread. */ - nsock = isc_mem_get(mgr->mctx, sizeof(isc_nmsocket_t)); - isc__nmsocket_init(nsock, mgr, isc_nm_udplistener, iface); - nsock->nchildren = mgr->nworkers; - atomic_init(&nsock->rchildren, mgr->nworkers); - nsock->children = isc_mem_get(mgr->mctx, - mgr->nworkers * sizeof(*nsock)); - memset(nsock->children, 0, mgr->nworkers * sizeof(*nsock)); + sock = isc_mem_get(mgr->mctx, sizeof(isc_nmsocket_t)); + isc__nmsocket_init(sock, mgr, isc_nm_udplistener, iface); - INSIST(nsock->recv_cb == NULL && nsock->recv_cbarg == NULL); - nsock->recv_cb = cb; - nsock->recv_cbarg = cbarg; - nsock->extrahandlesize = extrahandlesize; + sock->rchildren = 0; + sock->nchildren = mgr->nworkers; + children_size = sock->nchildren * sizeof(sock->children[0]); + sock->children = isc_mem_get(mgr->mctx, children_size); + memset(sock->children, 0, children_size); + + sock->recv_cb = cb; + sock->recv_cbarg = cbarg; + sock->extrahandlesize = extrahandlesize; + sock->result = ISC_R_DEFAULT; + sock->tid = isc_random_uniform(mgr->nworkers); + sock->fd = -1; for (size_t i = 0; i < mgr->nworkers; i++) { - isc_result_t result; - sa_family_t sa_family = iface->addr.type.sa.sa_family; - isc__netievent_udplisten_t *ievent = NULL; - isc_nmsocket_t *csock = &nsock->children[i]; + isc_nmsocket_t *csock = &sock->children[i]; isc__nmsocket_init(csock, mgr, isc_nm_udpsocket, iface); - csock->parent = nsock; - csock->tid = i; - csock->extrahandlesize = extrahandlesize; - - INSIST(csock->recv_cb == NULL && csock->recv_cbarg == NULL); + csock->parent = sock; + csock->iface = sock->iface; + csock->reading = true; csock->recv_cb = cb; csock->recv_cbarg = cbarg; - result = isc__nm_socket(sa_family, SOCK_DGRAM, 0, &csock->fd); - RUNTIME_CHECK(result == ISC_R_SUCCESS); + csock->extrahandlesize = sock->extrahandlesize; + csock->tid = i; - result = isc__nm_socket_reuse(csock->fd); - RUNTIME_CHECK(result == ISC_R_SUCCESS || - result == ISC_R_NOTIMPLEMENTED); + (void)isc__nm_udp_lb_socket(sa_family, &csock->fd); + REQUIRE(csock->fd >= 0); - result = isc__nm_socket_reuse_lb(csock->fd); - RUNTIME_CHECK(result == ISC_R_SUCCESS || - result == ISC_R_NOTIMPLEMENTED); - - /* We don't check for the result, because SO_INCOMING_CPU can be - * available without the setter on Linux kernel version 4.4, and - * setting SO_INCOMING_CPU is just an optimization. - */ - (void)isc__nm_socket_incoming_cpu(csock->fd); - - (void)isc__nm_socket_dontfrag(csock->fd, sa_family); - - ievent = isc__nm_get_ievent(mgr, netievent_udplisten); - ievent->sock = csock; + ievent = isc__nm_get_netievent_udplisten(mgr, csock); isc__nm_enqueue_ievent(&mgr->workers[i], (isc__netievent_t *)ievent); } - *sockp = nsock; - return (ISC_R_SUCCESS); + LOCK(&sock->lock); + while (sock->rchildren != mgr->nworkers) { + WAIT(&sock->cond, &sock->lock); + } + result = sock->result; + atomic_store(&sock->active, true); + BROADCAST(&sock->scond); + UNLOCK(&sock->lock); + INSIST(result != ISC_R_DEFAULT); + + if (result == ISC_R_SUCCESS) { + REQUIRE(sock->rchildren == mgr->nworkers); + *sockp = sock; + } else { + atomic_store(&sock->active, false); + isc__nm_udp_stoplistening(sock); + isc_nmsocket_close(&sock); + } + + return (result); } /*%< @@ -161,11 +205,20 @@ udp_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf) { void isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { isc__netievent_udplisten_t *ievent = (isc__netievent_udplisten_t *)ev0; - isc_nmsocket_t *sock = ievent->sock; + isc_nmiface_t *iface; + isc_nmsocket_t *sock = NULL; int r, uv_bind_flags = 0; int uv_init_flags = 0; sa_family_t sa_family; + REQUIRE(VALID_NMSOCK(ievent->sock)); + REQUIRE(ievent->sock->tid == isc_nm_tid()); + REQUIRE(VALID_NMSOCK(ievent->sock->parent)); + + sock = ievent->sock; + iface = sock->iface; + sa_family = iface->addr.type.sa.sa_family; + REQUIRE(sock->type == isc_nm_udpsocket); REQUIRE(sock->iface != NULL); REQUIRE(sock->parent != NULL); @@ -174,41 +227,34 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { #ifdef UV_UDP_RECVMMSG uv_init_flags |= UV_UDP_RECVMMSG; #endif - uv_udp_init_ex(&worker->loop, &sock->uv_handle.udp, uv_init_flags); - uv_handle_set_data(&sock->uv_handle.handle, NULL); - isc__nmsocket_attach(sock, - (isc_nmsocket_t **)&sock->uv_handle.udp.data); + r = uv_udp_init_ex(&worker->loop, &sock->uv_handle.udp, uv_init_flags); + RUNTIME_CHECK(r == 0); + uv_handle_set_data(&sock->uv_handle.handle, sock); + /* This keeps the socket alive after everything else is gone */ + isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL }); + + r = uv_timer_init(&worker->loop, &sock->timer); + RUNTIME_CHECK(r == 0); + uv_handle_set_data((uv_handle_t *)&sock->timer, sock); r = uv_udp_open(&sock->uv_handle.udp, sock->fd); - if (r == 0) { - isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]); - } else { + if (r < 0) { + isc__nm_closesocket(sock->fd); isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]); + goto failure; } + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]); - sa_family = sock->iface->addr.type.sa.sa_family; if (sa_family == AF_INET6) { uv_bind_flags |= UV_UDP_IPV6ONLY; } - r = uv_udp_bind(&sock->uv_handle.udp, - &sock->parent->iface->addr.type.sa, uv_bind_flags); - if (r == UV_EADDRNOTAVAIL && - isc__nm_socket_freebind(sock->fd, sa_family) == ISC_R_SUCCESS) - { - /* - * Retry binding with IP_FREEBIND (or equivalent option) if the - * address is not available. This helps with IPv6 tentative - * addresses which are reported by the route socket, although - * named is not yet able to properly bind to them. - */ - r = uv_udp_bind(&sock->uv_handle.udp, + r = isc_uv_udp_freebind(&sock->uv_handle.udp, &sock->parent->iface->addr.type.sa, uv_bind_flags); - } - - if (r < 0) { + if (r < 0 && r != UV_EINVAL) { isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]); + goto failure; } #ifdef ISC_RECV_BUFFER_SIZE uv_recv_buffer_size(&sock->uv_handle.handle, @@ -218,88 +264,61 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { uv_send_buffer_size(&sock->uv_handle.handle, &(int){ ISC_SEND_BUFFER_SIZE }); #endif - uv_udp_recv_start(&sock->uv_handle.udp, udp_alloc_cb, udp_recv_cb); -} - -static void -udp_stop_cb(uv_handle_t *handle) { - isc_nmsocket_t *sock = uv_handle_get_data(handle); - - isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CLOSE]); - atomic_store(&sock->closed, true); - isc__nmsocket_detach((isc_nmsocket_t **)&sock->uv_handle.udp.data); -} - -static void -stop_udp_child(isc_nmsocket_t *sock) { - REQUIRE(sock->type == isc_nm_udpsocket); - REQUIRE(sock->tid == isc_nm_tid()); - - uv_udp_recv_stop(&sock->uv_handle.udp); - - if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false }, - true)) { - return; + r = uv_udp_recv_start(&sock->uv_handle.udp, udp_alloc_cb, udp_recv_cb); + if (r != 0) { + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]); + goto failure; } - uv_close(&sock->uv_handle.handle, udp_stop_cb); + atomic_store(&sock->listening, true); LOCK(&sock->parent->lock); - atomic_fetch_sub(&sock->parent->rchildren, 1); + sock->parent->rchildren += 1; + if (sock->parent->result == ISC_R_DEFAULT) { + sock->parent->result = ISC_R_SUCCESS; + } + SIGNAL(&sock->parent->cond); + if (!atomic_load(&sock->parent->active)) { + WAIT(&sock->parent->scond, &sock->parent->lock); + } + INSIST(atomic_load(&sock->parent->active)); + UNLOCK(&sock->parent->lock); + + return; +failure: + LOCK(&sock->parent->lock); + sock->parent->rchildren += 1; + if (sock->parent->result == ISC_R_DEFAULT) { + sock->parent->result = isc__nm_uverr2result(r); + } + SIGNAL(&sock->parent->cond); + if (!atomic_load(&sock->parent->active)) { + WAIT(&sock->parent->scond, &sock->parent->lock); + } + INSIST(atomic_load(&sock->parent->active)); UNLOCK(&sock->parent->lock); - BROADCAST(&sock->parent->cond); } static void -stoplistening(isc_nmsocket_t *sock) { - REQUIRE(sock->type == isc_nm_udplistener); - - for (int i = 0; i < sock->nchildren; i++) { - isc__netievent_udpstop_t *event = NULL; - - if (isc_nm_tid() == sock->children[i].tid) { - stop_udp_child(&sock->children[i]); - continue; - } - - event = isc__nm_get_ievent(sock->mgr, netievent_udpstop); - event->sock = &sock->children[i]; - isc__nm_enqueue_ievent(&sock->mgr->workers[i], - (isc__netievent_t *)event); - } - - LOCK(&sock->lock); - while (atomic_load_relaxed(&sock->rchildren) > 0) { - WAIT(&sock->cond, &sock->lock); - } - atomic_store(&sock->closed, true); - UNLOCK(&sock->lock); - - isc__nmsocket_prep_destroy(sock); +enqueue_stoplistening(isc_nmsocket_t *sock) { + isc__netievent_udpstop_t *ievent = + isc__nm_get_netievent_udpstop(sock->mgr, sock); + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); } void isc__nm_udp_stoplistening(isc_nmsocket_t *sock) { - isc__netievent_udpstop_t *ievent = NULL; - - /* We can't be launched from network thread, we'd deadlock */ - REQUIRE(!isc__nm_in_netthread()); REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->type == isc_nm_udplistener); - /* - * If the manager is interlocked, re-enqueue this as an asynchronous - * event. Otherwise, go ahead and stop listening right away. - */ - if (!isc__nm_acquire_interlocked(sock->mgr)) { - ievent = isc__nm_get_ievent(sock->mgr, netievent_udpstop); - ievent->sock = sock; - isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); - } else { - stoplistening(sock); - isc__nm_drop_interlocked(sock->mgr); + if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false }, + true)) { + INSIST(0); + ISC_UNREACHABLE(); } + + enqueue_stoplistening(sock); } /* @@ -310,12 +329,11 @@ isc__nm_async_udpstop(isc__networker_t *worker, isc__netievent_t *ev0) { isc__netievent_udpstop_t *ievent = (isc__netievent_udpstop_t *)ev0; isc_nmsocket_t *sock = ievent->sock; - REQUIRE(sock->iface != NULL); UNUSED(worker); - /* - * If this is a child socket, stop listening and return. - */ + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + if (sock->parent != NULL) { stop_udp_child(sock); return; @@ -325,35 +343,31 @@ isc__nm_async_udpstop(isc__networker_t *worker, isc__netievent_t *ev0) { * If network manager is paused, re-enqueue the event for later. */ if (!isc__nm_acquire_interlocked(sock->mgr)) { - isc__netievent_udplisten_t *event = NULL; - - event = isc__nm_get_ievent(sock->mgr, netievent_udpstop); - event->sock = sock; - isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)event); + enqueue_stoplistening(sock); } else { - stoplistening(sock); + stop_udp_parent(sock); isc__nm_drop_interlocked(sock->mgr); } } /* * udp_recv_cb handles incoming UDP packet from uv. The buffer here is - * reused for a series of packets, so we need to allocate a new one. This - * new one can be reused to send the response then. + * reused for a series of packets, so we need to allocate a new one. + * This new one can be reused to send the response then. */ static void udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, const struct sockaddr *addr, unsigned flags) { - isc_result_t result; - isc_sockaddr_t sockaddr; isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)handle); - isc__nm_uvreq_t *req = NULL; + isc__nm_uvreq_t *req; uint32_t maxudp; bool free_buf; + isc_sockaddr_t sockaddr; + isc_result_t result; REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(sock->reading); #ifdef UV_UDP_MMSG_FREE free_buf = ((flags & UV_UDP_MMSG_FREE) == UV_UDP_MMSG_FREE); @@ -366,12 +380,7 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, /* * Three possible reasons to return now without processing: - * - If addr == NULL, in which case it's the end of stream; - * we can free the buffer and bail. */ - if (addr == NULL) { - goto done; - } /* * - If we're simulating a firewall blocking UDP packets @@ -379,47 +388,56 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, */ maxudp = atomic_load(&sock->mgr->maxudp); if ((maxudp != 0 && (uint32_t)nrecv > maxudp)) { - goto done; + /* + * We need to keep the read_cb intact in case, so the + * readtimeout_cb can trigger and not crash because of + * missing read_req. + */ + goto free; + } + + /* + * - If addr == NULL, in which case it's the end of stream; + * we can free the buffer and bail. + */ + if (addr == NULL) { + failed_read_cb(sock, ISC_R_EOF); + goto free; } /* * - If the socket is no longer active. */ if (!isc__nmsocket_active(sock)) { - goto done; + failed_read_cb(sock, ISC_R_CANCELED); + goto free; } - if (sock->timer_running) { - uv_timer_stop(&sock->timer); - sock->timer_running = false; + if (nrecv < 0) { + failed_read_cb(sock, isc__nm_uverr2result(nrecv)); + goto free; } - req = isc__nm_uvreq_get(sock->mgr, sock); - req->cb.recv = sock->recv_cb; - req->cbarg = sock->recv_cbarg; + result = isc_sockaddr_fromsockaddr(&sockaddr, addr); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + + req = get_read_req(sock, &sockaddr); + /* * The callback will be called synchronously, because result is - * ISC_R_SUCCESS. + * ISC_R_SUCCESS, so we are ok of passing the buf directly. */ req->uvbuf.base = buf->base; req->uvbuf.len = nrecv; - if (atomic_load(&sock->client)) { - if (nrecv < 0) { - failed_read_cb(sock, isc__nm_uverr2result(nrecv)); - return; - } + sock->recv_read = false; - isc_nmhandle_attach(sock->statichandle, &req->handle); - } else { - result = isc_sockaddr_fromsockaddr(&sockaddr, addr); - RUNTIME_CHECK(result == ISC_R_SUCCESS); - - req->handle = isc__nmhandle_get(sock, &sockaddr, NULL); - } + REQUIRE(!sock->processing); + sock->processing = true; isc__nm_readcb(sock, req, ISC_R_SUCCESS); + sock->processing = false; -done: +free: if (free_buf) { isc__nm_free_uvbuf(sock, buf); } @@ -427,8 +445,8 @@ done: /* * Send the data in 'region' to a peer via a UDP socket. We try to find - * a proper sibling/child socket so that we won't have to jump to another - * thread. + * a proper sibling/child socket so that we won't have to jump to + * another thread. */ void isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb, @@ -450,12 +468,6 @@ isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb, uvreq->cb.send = cb; uvreq->cbarg = cbarg; - if (inactive(sock)) { - isc__nm_incstats(sock->mgr, sock->statsindex[STATID_SENDFAIL]); - failed_send_cb(sock, uvreq, ISC_R_CANCELED); - return; - } - /* * We're simulating a firewall blocking UDP packets bigger than * 'maxudp' bytes, for testing purposes. @@ -466,7 +478,7 @@ isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb, */ if (maxudp != 0 && region->length > maxudp) { isc__nm_uvreq_put(&uvreq, sock); - isc_nmhandle_detach(&handle); + isc_nmhandle_detach(&handle); /* FIXME? */ return; } @@ -482,8 +494,9 @@ isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb, /* * If we're in the network thread, we can send directly. If the - * handle is associated with a UDP socket, we can reuse its thread - * (assuming CPU affinity). Otherwise, pick a thread at random. + * handle is associated with a UDP socket, we can reuse its + * thread (assuming CPU affinity). Otherwise, pick a thread at + * random. */ if (isc__nm_in_netthread()) { ntid = isc_nm_tid(); @@ -512,10 +525,10 @@ isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb, } } else { /* - * We need to create an event and pass it using async channel + * We need to create an event and pass it using async + * channel */ - ievent = isc__nm_get_ievent(sock->mgr, netievent_udpsend); - ievent->sock = rsock; + ievent = isc__nm_get_netievent_udpsend(sock->mgr, rsock); ievent->peer = *peer; ievent->req = uvreq; @@ -535,7 +548,8 @@ isc__nm_async_udpsend(isc__networker_t *worker, isc__netievent_t *ev0) { isc__nm_uvreq_t *uvreq = ievent->req; REQUIRE(sock->type == isc_nm_udpsocket); - REQUIRE(worker->id == sock->tid); + REQUIRE(sock->tid == isc_nm_tid()); + UNUSED(worker); if (!isc__nmsocket_active(ievent->sock)) { failed_send_cb(sock, uvreq, ISC_R_CANCELED); @@ -596,6 +610,7 @@ udp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, sa = NULL; } #endif + r = uv_udp_send(&req->uv_req.udp_send, &sock->uv_handle.udp, &req->uvbuf, 1, sa, udp_send_cb); if (r < 0) { @@ -619,24 +634,18 @@ udp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { atomic_store(&sock->connecting, true); r = uv_udp_init(&worker->loop, &sock->uv_handle.udp); - if (r != 0) { - isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]); - /* Socket was never opened; no need for isc__nm_udp_close() */ - atomic_store(&sock->closing, true); - atomic_store(&sock->closed, true); - atomic_store(&sock->connect_error, true); - atomic_store(&sock->active, false); - return (isc__nm_uverr2result(r)); - } + RUNTIME_CHECK(r == 0); + uv_handle_set_data(&sock->uv_handle.handle, sock); + + r = uv_timer_init(&worker->loop, &sock->timer); + RUNTIME_CHECK(r == 0); + uv_handle_set_data((uv_handle_t *)&sock->timer, sock); r = uv_udp_open(&sock->uv_handle.udp, sock->fd); if (r != 0) { + isc__nm_closesocket(sock->fd); isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]); - atomic_store(&sock->connect_error, true); - atomic_store(&sock->result, isc__nm_uverr2result(r)); - atomic_store(&sock->active, false); - isc__nm_udp_close(sock); - return (isc__nm_uverr2result(r)); + goto failure; } isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]); @@ -648,24 +657,14 @@ udp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { uv_bind_flags); if (r != 0) { isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]); - atomic_store(&sock->connect_error, true); - atomic_store(&sock->result, isc__nm_uverr2result(r)); - atomic_store(&sock->active, false); - isc__nm_udp_close(sock); - return (isc__nm_uverr2result(r)); + goto failure; } - uv_handle_set_data(&sock->uv_handle.handle, sock); - r = isc_uv_udp_connect(&sock->uv_handle.udp, &req->peer.type.sa); if (r != 0) { isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CONNECTFAIL]); - atomic_store(&sock->connect_error, true); - atomic_store(&sock->result, isc__nm_uverr2result(r)); - atomic_store(&sock->active, false); - isc__nm_udp_close(sock); - return (isc__nm_uverr2result(r)); + goto failure; } isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CONNECT]); atomic_store(&sock->connecting, false); @@ -678,12 +677,22 @@ udp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { uv_send_buffer_size(&sock->uv_handle.handle, &(int){ ISC_SEND_BUFFER_SIZE }); #endif + + atomic_store(&sock->connected, true); + return (ISC_R_SUCCESS); + +failure: + atomic_store(&sock->active, false); + + isc__nm_udp_close(sock); + + return (isc__nm_uverr2result(r)); } /* - * Asynchronous 'udpconnect' call handler: open a new UDP socket and call - * the 'open' callback with a handle. + * Asynchronous 'udpconnect' call handler: open a new UDP socket and + * call the 'open' callback with a handle. */ void isc__nm_async_udpconnect(isc__networker_t *worker, isc__netievent_t *ev0) { @@ -703,19 +712,27 @@ isc__nm_async_udpconnect(isc__networker_t *worker, isc__netievent_t *ev0) { req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface->addr); result = udp_connect_direct(sock, req); - atomic_store(&sock->result, result); - if (result == ISC_R_SUCCESS) { - atomic_store(&sock->connected, true); - isc__nm_connectcb(sock, req, ISC_R_SUCCESS); - } else { - atomic_store(&sock->connect_error, true); + if (result != ISC_R_SUCCESS) { isc__nm_uvreq_put(&req, sock); } LOCK(&sock->lock); + sock->result = result; SIGNAL(&sock->cond); + if (!atomic_load(&sock->active)) { + WAIT(&sock->scond, &sock->lock); + } + INSIST(atomic_load(&sock->active)); UNLOCK(&sock->lock); + /* + * The callback has to be called after the socket has been + * initialized + */ + if (result == ISC_R_SUCCESS) { + isc__nm_connectcb(sock, req, ISC_R_SUCCESS); + } + /* * The sock is now attached to the handle. */ @@ -727,7 +744,7 @@ isc_nm_udpconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer, isc_nm_cb_t cb, void *cbarg, unsigned int timeout, size_t extrahandlesize) { isc_result_t result = ISC_R_SUCCESS; - isc_nmsocket_t *sock = NULL, *tmp = NULL; + isc_nmsocket_t *sock = NULL; isc__netievent_udpconnect_t *event = NULL; isc__nm_uvreq_t *req = NULL; sa_family_t sa_family; @@ -740,8 +757,8 @@ isc_nm_udpconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer, sa_family = peer->addr.type.sa.sa_family; /* - * The socket() call can fail spuriously on FreeBSD 12, so we need to - * handle the failure early and gracefully. + * The socket() call can fail spuriously on FreeBSD 12, so we + * need to handle the failure early and gracefully. */ result = isc__nm_socket(sa_family, SOCK_DGRAM, 0, &fd); if (result != ISC_R_SUCCESS) { @@ -751,13 +768,14 @@ isc_nm_udpconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer, sock = isc_mem_get(mgr->mctx, sizeof(isc_nmsocket_t)); isc__nmsocket_init(sock, mgr, isc_nm_udpsocket, local); - INSIST(sock->connect_cb == NULL && sock->connect_cbarg == NULL); + atomic_init(&sock->active, false); sock->connect_cb = cb; sock->connect_cbarg = cbarg; sock->read_timeout = timeout; sock->extrahandlesize = extrahandlesize; sock->peer = peer->addr; sock->fd = fd; + sock->result = ISC_R_DEFAULT; atomic_init(&sock->client, true); result = isc__nm_socket_reuse(sock->fd); @@ -778,37 +796,29 @@ isc_nm_udpconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer, req->peer = peer->addr; req->local = local->addr; - event = isc__nm_get_ievent(mgr, netievent_udpconnect); - event->sock = sock; - event->req = req; - - /* - * Hold an additional sock reference so async callbacks - * can't destroy it until we're ready. - */ - isc__nmsocket_attach(sock, &tmp); + event = isc__nm_get_netievent_udpconnect(mgr, sock, req); if (isc__nm_in_netthread()) { + atomic_store(&sock->active, true); sock->tid = isc_nm_tid(); isc__nm_async_udpconnect(&mgr->workers[sock->tid], (isc__netievent_t *)event); - isc__nm_put_ievent(mgr, event); + isc__nm_put_netievent_udpconnect(mgr, event); } else { sock->tid = isc_random_uniform(mgr->nworkers); isc__nm_enqueue_ievent(&mgr->workers[sock->tid], (isc__netievent_t *)event); - - LOCK(&sock->lock); - while (!atomic_load(&sock->connected) && - !atomic_load(&sock->connect_error)) { - WAIT(&sock->cond, &sock->lock); - } - UNLOCK(&sock->lock); } - - result = atomic_load(&sock->result); - - isc__nmsocket_detach(&tmp); + LOCK(&sock->lock); + result = sock->result; + while (sock->result == ISC_R_DEFAULT) { + WAIT(&sock->cond, &sock->lock); + result = sock->result; + } + atomic_store(&sock->active, true); + BROADCAST(&sock->scond); + UNLOCK(&sock->lock); + ENSURE(result != ISC_R_DEFAULT); return (result); } @@ -817,31 +827,59 @@ static void udp_read_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, const struct sockaddr *addr, unsigned flags) { isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)handle); + REQUIRE(VALID_NMSOCK(sock)); udp_recv_cb(handle, nrecv, buf, addr, flags); - uv_udp_recv_stop(&sock->uv_handle.udp); + /* + * If a caller calls isc_nm_read() on a listening socket, we can + * get here, but we MUST NOT stop reading from the listener + * socket. The only difference between listener and connected + * sockets is that the former has sock->parent set and later + * does not. + */ + if (!sock->parent) { + stop_reading(sock); + } } static void failed_read_cb(isc_nmsocket_t *sock, isc_result_t result) { REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(sock->statichandle != NULL); + REQUIRE(result != ISC_R_SUCCESS); - if (sock->timer_initialized) { - uv_timer_stop(&sock->timer); - sock->timer_running = false; + if (atomic_load(&sock->client)) { + stop_reading(sock); + + if (!sock->recv_read) { + goto destroy; + } + sock->recv_read = false; + + if (sock->recv_cb != NULL) { + isc__nm_uvreq_t *req = get_read_req(sock, NULL); + isc__nmsocket_clearcb(sock); + isc__nm_readcb(sock, req, result); + } + + destroy: + isc__nmsocket_prep_destroy(sock); + return; } - uv_udp_recv_stop(&sock->uv_handle.udp); + /* + * For UDP server socket, we don't have child socket via + * "accept", so we: + * - we continue to read + * - we don't clear the callbacks + * - we don't destroy it (only stoplistening could do that) + */ + if (!sock->recv_read) { + return; + } + sock->recv_read = false; if (sock->recv_cb != NULL) { - isc__nm_uvreq_t *req = isc__nm_uvreq_get(sock->mgr, sock); - isc_nmhandle_attach(sock->statichandle, &req->handle); - req->cb.recv = sock->recv_cb; - req->cbarg = sock->recv_cbarg; - - isc__nmsocket_clearcb(sock); - + isc__nm_uvreq_t *req = get_read_req(sock, NULL); isc__nm_readcb(sock, req, result); } } @@ -859,12 +897,30 @@ failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, } } +static isc__nm_uvreq_t * +get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr) { + isc__nm_uvreq_t *req = NULL; + + req = isc__nm_uvreq_get(sock->mgr, sock); + req->cb.recv = sock->recv_cb; + req->cbarg = sock->recv_cbarg; + + if (atomic_load(&sock->client)) { + isc_nmhandle_attach(sock->statichandle, &req->handle); + } else { + req->handle = isc__nmhandle_get(sock, sockaddr, NULL); + } + + return req; +} + static void readtimeout_cb(uv_timer_t *handle) { isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)handle); REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(sock->reading); /* * Timeout; stop reading and process whatever we have. @@ -873,106 +929,216 @@ readtimeout_cb(uv_timer_t *handle) { } /* - * Asynchronous 'udpread' call handler: start or resume reading on a socket; - * pause reading and call the 'recv' callback after each datagram. + * Asynchronous 'udpread' call handler: start or resume reading on a + * socket; pause reading and call the 'recv' callback after each + * datagram. */ void isc__nm_async_udpread(isc__networker_t *worker, isc__netievent_t *ev0) { isc__netievent_udpread_t *ievent = (isc__netievent_udpread_t *)ev0; isc_nmsocket_t *sock = ievent->sock; + UNUSED(worker); + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + if (inactive(sock)) { + sock->reading = true; failed_read_cb(sock, ISC_R_CANCELED); return; } - REQUIRE(worker->id == isc_nm_tid()); - if (sock->read_timeout != 0) { - if (!sock->timer_initialized) { - uv_timer_init(&worker->loop, &sock->timer); - uv_handle_set_data((uv_handle_t *)&sock->timer, sock); - sock->timer_initialized = true; - } - uv_timer_start(&sock->timer, readtimeout_cb, sock->read_timeout, - 0); - sock->timer_running = true; + start_reading(sock); +} + +static void +start_sock_timer(isc_nmsocket_t *sock) { + if (sock->read_timeout > 0) { + int r = uv_timer_start(&sock->timer, readtimeout_cb, + sock->read_timeout, 0); + REQUIRE(r == 0); + } +} + +static void +stop_sock_timer(isc_nmsocket_t *sock) { + int r = uv_timer_stop(&sock->timer); + REQUIRE(r == 0); +} + +static void +start_reading(isc_nmsocket_t *sock) { + if (sock->reading) { + return; } - uv_udp_recv_start(&sock->uv_handle.udp, udp_alloc_cb, udp_read_cb); + int r = uv_udp_recv_start(&sock->uv_handle.udp, udp_alloc_cb, + udp_read_cb); + REQUIRE(r == 0); + sock->reading = true; + + start_sock_timer(sock); +} + +static void +stop_reading(isc_nmsocket_t *sock) { + if (!sock->reading) { + return; + } + + int r = uv_udp_recv_stop(&sock->uv_handle.udp); + REQUIRE(r == 0); + sock->reading = false; + + stop_sock_timer(sock); } void isc__nm_udp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { - isc_nmsocket_t *sock = handle->sock; - isc__netievent_startread_t *ievent = NULL; - REQUIRE(VALID_NMHANDLE(handle)); REQUIRE(VALID_NMSOCK(handle->sock)); - REQUIRE(handle->sock->type == isc_nm_udpsocket); - if (inactive(sock)) { - isc__nm_incstats(sock->mgr, sock->statsindex[STATID_RECVFAIL]); - cb(handle, ISC_R_CANCELED, NULL, cbarg); - return; - } + isc_nmsocket_t *sock = handle->sock; + isc__netievent_udpread_t *ievent = NULL; + REQUIRE(sock->type == isc_nm_udpsocket); + REQUIRE(sock->statichandle == handle); REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(!sock->recv_read); + sock->recv_cb = cb; sock->recv_cbarg = cbarg; + sock->recv_read = true; - ievent = isc__nm_get_ievent(sock->mgr, netievent_udpread); - ievent->sock = sock; + ievent = isc__nm_get_netievent_udpread(sock->mgr, sock); - if (sock->tid == isc_nm_tid()) { - isc__nm_async_udpread(&sock->mgr->workers[sock->tid], - (isc__netievent_t *)ievent); - isc__nm_put_ievent(sock->mgr, ievent); - } else { + if (sock->reading) { isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)ievent); + } else { + isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *)ievent); } } static void -udp_close_cb(uv_handle_t *uvhandle) { - isc_nmsocket_t *sock = uv_handle_get_data(uvhandle); +udp_stop_cb(uv_handle_t *handle) { + isc_nmsocket_t *sock = uv_handle_get_data(handle); + uv_handle_set_data(handle, NULL); REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(atomic_load(&sock->closing)); + + if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false }, + true)) { + INSIST(0); + ISC_UNREACHABLE(); + } isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CLOSE]); - atomic_store(&sock->closed, true); + + atomic_store(&sock->listening, false); + + isc__nmsocket_detach(&sock); +} + +static void +udp_close_cb(uv_handle_t *handle) { + isc_nmsocket_t *sock = uv_handle_get_data(handle); + uv_handle_set_data(handle, NULL); + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(atomic_load(&sock->closing)); + + if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false }, + true)) { + INSIST(0); + ISC_UNREACHABLE(); + } + + isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CLOSE]); + + if (sock->server != NULL) { + isc__nmsocket_detach(&sock->server); + } + + atomic_store(&sock->connected, false); + atomic_store(&sock->listening, false); + isc__nmsocket_prep_destroy(sock); } static void -timer_close_cb(uv_handle_t *uvhandle) { - uv_handle_t *handle = uv_handle_get_data(uvhandle); +timer_close_cb(uv_handle_t *handle) { + isc_nmsocket_t *sock = uv_handle_get_data(handle); + uv_handle_set_data(handle, NULL); - uv_close(handle, udp_close_cb); + if (sock->parent) { + uv_close(&sock->uv_handle.handle, udp_stop_cb); + } else { + uv_close(&sock->uv_handle.handle, udp_close_cb); + } +} + +static void +stop_udp_child(isc_nmsocket_t *sock) { + REQUIRE(sock->type == isc_nm_udpsocket); + REQUIRE(sock->tid == isc_nm_tid()); + + if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false }, + true)) { + return; + } + + udp_close_direct(sock); + + LOCK(&sock->parent->lock); + sock->parent->rchildren -= 1; + UNLOCK(&sock->parent->lock); + BROADCAST(&sock->parent->cond); +} + +static void +stop_udp_parent(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_udplistener); + + for (int i = 0; i < sock->nchildren; i++) { + isc__netievent_udpstop_t *ievent = NULL; + isc_nmsocket_t *csock = &sock->children[i]; + REQUIRE(VALID_NMSOCK(csock)); + + atomic_store(&csock->active, false); + + if (csock->tid == isc_nm_tid()) { + stop_udp_child(csock); + continue; + } + + ievent = isc__nm_get_netievent_udpstop(sock->mgr, csock); + isc__nm_enqueue_ievent(&sock->mgr->workers[i], + (isc__netievent_t *)ievent); + } + + LOCK(&sock->lock); + while (sock->rchildren > 0) { + WAIT(&sock->cond, &sock->lock); + } + atomic_store(&sock->closed, true); + UNLOCK(&sock->lock); + + isc__nmsocket_prep_destroy(sock); } static void udp_close_direct(isc_nmsocket_t *sock) { - uv_udp_recv_stop(&sock->uv_handle.udp); + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); - if (sock->timer_running) { - uv_timer_stop(&sock->timer); - sock->timer_running = false; - } - - if (sock->timer_initialized) { - sock->timer_initialized = false; - /* - * The read and timer is stopped and the socket will be - * scheduled to be closed, so we can override the data that the - * timer handle holds. - */ - uv_handle_set_data((uv_handle_t *)&sock->timer, - &sock->uv_handle.handle); - uv_close((uv_handle_t *)&sock->timer, timer_close_cb); - } else { - uv_close(&sock->uv_handle.handle, udp_close_cb); - } + uv_close((uv_handle_t *)&sock->timer, timer_close_cb); } void @@ -980,7 +1146,9 @@ isc__nm_async_udpclose(isc__networker_t *worker, isc__netievent_t *ev0) { isc__netievent_udpclose_t *ievent = (isc__netievent_udpclose_t *)ev0; isc_nmsocket_t *sock = ievent->sock; - REQUIRE(worker->id == ievent->sock->tid); + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + UNUSED(worker); udp_close_direct(sock); } @@ -1000,8 +1168,7 @@ isc__nm_udp_close(isc_nmsocket_t *sock) { udp_close_direct(sock); } else { isc__netievent_udpclose_t *ievent = - isc__nm_get_ievent(sock->mgr, netievent_udpclose); - ievent->sock = sock; + isc__nm_get_netievent_udpclose(sock->mgr, sock); isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)ievent); } @@ -1011,14 +1178,7 @@ void isc__nm_udp_shutdown(isc_nmsocket_t *sock) { REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->tid == isc_nm_tid()); - - if (sock->type != isc_nm_udpsocket) { - return; - } - - if (atomic_load(&sock->connecting)) { - return; - } + REQUIRE(sock->type == isc_nm_udpsocket); /* * If the socket is active, mark it inactive and @@ -1028,8 +1188,29 @@ isc__nm_udp_shutdown(isc_nmsocket_t *sock) { return; } - if (sock->statichandle != NULL) { + /* + * If the socket is connecting, the cancel will happen in the + * async_udpconnect() due socket being inactive now. + */ + if (atomic_load(&sock->connecting)) { + return; + } + + /* + * When the client detaches the last handle, the + * sock->statichandle would be NULL, in that case, nobody is + * interested in the callback. + */ + if (sock->statichandle) { failed_read_cb(sock, ISC_R_CANCELED); + return; + } + + /* + * Otherwise, we just send the socket to abyss... + */ + if (sock->parent == NULL) { + isc__nmsocket_prep_destroy(sock); } } @@ -1045,9 +1226,7 @@ isc__nm_udp_cancelread(isc_nmhandle_t *handle) { REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->type == isc_nm_udpsocket); - ievent = isc__nm_get_ievent(sock->mgr, netievent_udpcancel); - ievent->sock = sock; - isc_nmhandle_attach(handle, &ievent->handle); + ievent = isc__nm_get_netievent_udpcancel(sock->mgr, sock, handle); isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *)ievent); @@ -1056,31 +1235,29 @@ isc__nm_udp_cancelread(isc_nmhandle_t *handle) { void isc__nm_async_udpcancel(isc__networker_t *worker, isc__netievent_t *ev0) { isc__netievent_udpcancel_t *ievent = (isc__netievent_udpcancel_t *)ev0; - isc_nmsocket_t *sock = ievent->sock; - isc_nmhandle_t *handle = ievent->handle; + isc_nmsocket_t *sock; - REQUIRE(worker->id == ievent->sock->tid); + UNUSED(worker); - uv_udp_recv_stop(&sock->uv_handle.udp); + REQUIRE(VALID_NMSOCK(ievent->sock)); - if (atomic_load(&sock->client)) { - failed_read_cb(sock, ISC_R_EOF); - } + sock = ievent->sock; - isc_nmhandle_detach(&handle); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(atomic_load(&sock->client)); + + failed_read_cb(sock, ISC_R_EOF); } void isc__nm_udp_settimeout(isc_nmhandle_t *handle, uint32_t timeout) { - isc_nmsocket_t *sock = NULL; - REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); - sock = handle->sock; + isc_nmsocket_t *sock = handle->sock; sock->read_timeout = timeout; - if (sock->timer_running) { - uv_timer_start(&sock->timer, readtimeout_cb, sock->read_timeout, - 0); + if (uv_is_active((uv_handle_t *)&sock->timer)) { + start_sock_timer(sock); } } diff --git a/lib/isc/netmgr/uv-compat.c b/lib/isc/netmgr/uv-compat.c index 51964477e6..1d60ae6312 100644 --- a/lib/isc/netmgr/uv-compat.c +++ b/lib/isc/netmgr/uv-compat.c @@ -14,179 +14,7 @@ #include -#ifndef HAVE_UV_IMPORT -/* - * XXXWPK: This code goes into libuv internals and it's platform dependent. - * It's ugly, we shouldn't do it, but the alternative with passing sockets - * over IPC sockets is even worse, and causes all kind of different - * problems. We should try to push these things upstream. - */ - -#ifdef WIN32 -/* This code is adapted from libuv/src/win/internal.h */ - -typedef enum { - UV__IPC_SOCKET_XFER_NONE = 0, - UV__IPC_SOCKET_XFER_TCP_CONNECTION, - UV__IPC_SOCKET_XFER_TCP_SERVER -} uv__ipc_socket_xfer_type_t; - -typedef struct { - WSAPROTOCOL_INFOW socket_info; - uint32_t delayed_error; -} uv__ipc_socket_xfer_info_t; - -/* - * Needed to make sure that the internal structure that we pulled out of - * libuv hasn't changed. - */ - -int -uv__tcp_xfer_import(uv_tcp_t *tcp, uv__ipc_socket_xfer_type_t xfer_type, - uv__ipc_socket_xfer_info_t *xfer_info); - -int -uv__tcp_xfer_export(uv_tcp_t *handle, int target_pid, - uv__ipc_socket_xfer_type_t *xfer_type, - uv__ipc_socket_xfer_info_t *xfer_info); - -int -isc_uv_export(uv_stream_t *stream, isc_uv_stream_info_t *info) { - uv__ipc_socket_xfer_info_t xfer_info; - uv__ipc_socket_xfer_type_t xfer_type = UV__IPC_SOCKET_XFER_NONE; - - /* - * Needed to make sure that the internal structure that we pulled - * out of libuv hasn't changed. - */ - RUNTIME_CHECK(sizeof(uv__ipc_socket_xfer_info_t) == 632); - - if (stream->type != UV_TCP) { - return (-1); - } - int r = uv__tcp_xfer_export((uv_tcp_t *)stream, GetCurrentProcessId(), - &xfer_type, &xfer_info); - if (r != 0) { - return (r); - } - if (xfer_info.delayed_error != 0) { - return (xfer_info.delayed_error); - } - INSIST(xfer_type == UV__IPC_SOCKET_XFER_TCP_CONNECTION); - info->type = UV_TCP; - info->socket_info = xfer_info.socket_info; - return (0); -} - -int -isc_uv_import(uv_stream_t *stream, isc_uv_stream_info_t *info) { - if (stream->type != UV_TCP || info->type != UV_TCP) { - return (-1); - } - - return (uv__tcp_xfer_import( - (uv_tcp_t *)stream, UV__IPC_SOCKET_XFER_TCP_CONNECTION, - &(uv__ipc_socket_xfer_info_t){ - .socket_info = info->socket_info })); -} -#else /* WIN32 */ -/* Adapted from libuv/src/unix/internal.h */ -#include -#include - -static int -isc_uv__cloexec(int fd, int set) { - int r; - - /* - * This #ifdef is taken directly from the libuv sources. - * We use FIOCLEX and FIONCLEX ioctl() calls when possible, - * but on some platforms are not implemented, or defined but - * not implemented correctly. On those, we use the FD_CLOEXEC - * fcntl() call, which adds extra system call overhead, but - * works. - */ -#if defined(_AIX) || defined(__APPLE__) || defined(__DragonFly__) || \ - defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ - defined(__linux__) || defined(__OpenBSD__) || defined(__NetBSD__) - do { - r = ioctl(fd, set ? FIOCLEX : FIONCLEX); - } while (r == -1 && errno == EINTR); -#else /* FIOCLEX/FIONCLEX unsupported */ - int flags; - - do { - r = fcntl(fd, F_GETFD); - } while (r == -1 && errno == EINTR); - - if (r == -1) { - return (-1); - } - - if (!!(r & FD_CLOEXEC) == !!set) { - return (0); - } - - if (set) { - flags = r | FD_CLOEXEC; - } else { - flags = r & ~FD_CLOEXEC; - } - - do { - r = fcntl(fd, F_SETFD, flags); - } while (r == -1 && errno == EINTR); -#endif /* FIOCLEX/FIONCLEX unsupported */ - - if (r != 0) { - return (-1); - } - - return (0); -} - -int -isc_uv_export(uv_stream_t *stream, isc_uv_stream_info_t *info) { - int oldfd, fd; - int err; - - if (stream->type != UV_TCP) { - return (-1); - } - err = uv_fileno((uv_handle_t *)stream, (uv_os_fd_t *)&oldfd); - - if (err != 0) { - return (err); - } - - fd = dup(oldfd); - if (fd == -1) { - return (-1); - } - - err = isc_uv__cloexec(fd, 1); - if (err != 0) { - close(fd); - return (err); - } - - info->type = stream->type; - info->fd = fd; - return (0); -} - -int -isc_uv_import(uv_stream_t *stream, isc_uv_stream_info_t *info) { - if (info->type != UV_TCP) { - return (-1); - } - - uv_tcp_t *tcp = (uv_tcp_t *)stream; - return (uv_tcp_open(tcp, info->fd)); -} -#endif /* ifdef WIN32 */ - -#endif /* ifndef HAVE_UV_IMPORT */ +#include "netmgr-int.h" #ifndef HAVE_UV_UDP_CONNECT int @@ -219,3 +47,82 @@ isc_uv_udp_connect(uv_udp_t *handle, const struct sockaddr *addr) { return (0); } #endif /* ifndef HAVE_UV_UDP_CONNECT */ + +int +isc_uv_udp_freebind(uv_udp_t *handle, const struct sockaddr *addr, + unsigned int flags) { + int r; + int fd; + + r = uv_fileno((const uv_handle_t *)handle, (uv_os_fd_t *)&fd); + if (r < 0) { + return (r); + } + + r = uv_udp_bind(handle, addr, flags); + if (r == UV_EADDRNOTAVAIL && + isc__nm_socket_freebind(fd, addr->sa_family) == ISC_R_SUCCESS) + { + /* + * Retry binding with IP_FREEBIND (or equivalent option) if the + * address is not available. This helps with IPv6 tentative + * addresses which are reported by the route socket, although + * named is not yet able to properly bind to them. + */ + r = uv_udp_bind(handle, addr, flags); + } + + return (r); +} + +static int +isc__uv_tcp_bind_now(uv_tcp_t *handle, const struct sockaddr *addr, + unsigned int flags) { + int r; + struct sockaddr_storage sname; + int snamelen = sizeof(sname); + + r = uv_tcp_bind(handle, addr, flags); + if (r < 0) { + return (r); + } + + /* + * uv_tcp_bind() uses a delayed error, initially returning + * success even if bind() fails. By calling uv_tcp_getsockname() + * here we can find out whether the bind() call was successful. + */ + r = uv_tcp_getsockname(handle, (struct sockaddr *)&sname, &snamelen); + if (r < 0) { + return (r); + } + + return (0); +} + +int +isc_uv_tcp_freebind(uv_tcp_t *handle, const struct sockaddr *addr, + unsigned int flags) { + int r; + int fd; + + r = uv_fileno((const uv_handle_t *)handle, (uv_os_fd_t *)&fd); + if (r < 0) { + return (r); + } + + r = isc__uv_tcp_bind_now(handle, addr, flags); + if (r == UV_EADDRNOTAVAIL && + isc__nm_socket_freebind(fd, addr->sa_family) == ISC_R_SUCCESS) + { + /* + * Retry binding with IP_FREEBIND (or equivalent option) if the + * address is not available. This helps with IPv6 tentative + * addresses which are reported by the route socket, although + * named is not yet able to properly bind to them. + */ + r = isc__uv_tcp_bind_now(handle, addr, flags); + } + + return (r); +} diff --git a/lib/isc/netmgr/uv-compat.h b/lib/isc/netmgr/uv-compat.h index 334924588c..960e0aed42 100644 --- a/lib/isc/netmgr/uv-compat.h +++ b/lib/isc/netmgr/uv-compat.h @@ -33,53 +33,6 @@ uv_handle_set_data(uv_handle_t *handle, void *data) { } #endif /* ifndef HAVE_UV_HANDLE_SET_DATA */ -#ifdef HAVE_UV_IMPORT - -#define isc_uv_stream_info_t uv_stream_info_t -#define isc_uv_export uv_export -#define isc_uv_import uv_import - -#else - -/* - * These functions are not available in libuv, but they're very internal - * to libuv. We should try to get them merged upstream. - */ - -/* - * A sane way to pass listening TCP socket to child threads, without using - * IPC (as the libuv example shows) but a version of the uv_export() and - * uv_import() functions that were unfortunately removed from libuv. - * This is based on the original libuv code. - */ - -typedef struct isc_uv_stream_info_s isc_uv_stream_info_t; - -struct isc_uv_stream_info_s { - uv_handle_type type; -#ifdef WIN32 - WSAPROTOCOL_INFOW socket_info; -#else /* ifdef WIN32 */ - int fd; -#endif /* ifdef WIN32 */ -}; - -int -isc_uv_export(uv_stream_t *stream, isc_uv_stream_info_t *info); -/*%< - * Exports uv_stream_t as isc_uv_stream_info_t value, which could - * be used to initialize shared streams within the same process. - */ - -int -isc_uv_import(uv_stream_t *stream, isc_uv_stream_info_t *info); -/*%< - * Imports uv_stream_info_t value into uv_stream_t to initialize a - * shared stream. - */ - -#endif - #ifdef HAVE_UV_UDP_CONNECT #define isc_uv_udp_connect uv_udp_connect #else @@ -95,3 +48,11 @@ isc_uv_udp_connect(uv_udp_t *handle, const struct sockaddr *addr); */ #endif + +int +isc_uv_udp_freebind(uv_udp_t *handle, const struct sockaddr *addr, + unsigned int flags); + +int +isc_uv_tcp_freebind(uv_tcp_t *handle, const struct sockaddr *addr, + unsigned int flags); diff --git a/lib/isc/netmgr/uverr2result.c b/lib/isc/netmgr/uverr2result.c index eb9dbea9b6..3cd34a5e1f 100644 --- a/lib/isc/netmgr/uverr2result.c +++ b/lib/isc/netmgr/uverr2result.c @@ -27,7 +27,7 @@ */ isc_result_t isc___nm_uverr2result(int uverr, bool dolog, const char *file, - unsigned int line) { + unsigned int line, const char *func) { switch (uverr) { case UV_ENOTDIR: case UV_ELOOP: @@ -81,12 +81,15 @@ isc___nm_uverr2result(int uverr, bool dolog, const char *file, return (ISC_R_CONNREFUSED); case UV_ECANCELED: return (ISC_R_CANCELED); + case UV_EOF: + return (ISC_R_EOF); default: if (dolog) { - UNEXPECTED_ERROR(file, line, - "unable to convert libuv " - "error code to isc_result: %d: %s", - uverr, uv_strerror(uverr)); + UNEXPECTED_ERROR( + file, line, + "unable to convert libuv " + "error code in %s to isc_result: %d: %s", + func, uverr, uv_strerror(uverr)); } return (ISC_R_UNEXPECTED); } diff --git a/lib/isc/quota.c b/lib/isc/quota.c index b0f14ac7d2..709f174042 100644 --- a/lib/isc/quota.c +++ b/lib/isc/quota.c @@ -17,6 +17,12 @@ #include #include +#define QUOTA_MAGIC ISC_MAGIC('Q', 'U', 'O', 'T') +#define VALID_QUOTA(p) ISC_MAGIC_VALID(p, QUOTA_MAGIC) + +#define QUOTA_CB_MAGIC ISC_MAGIC('Q', 'T', 'C', 'B') +#define VALID_QUOTA_CB(p) ISC_MAGIC_VALID(p, QUOTA_CB_MAGIC) + void isc_quota_init(isc_quota_t *quota, unsigned int max) { atomic_init("a->max, max); @@ -25,10 +31,14 @@ isc_quota_init(isc_quota_t *quota, unsigned int max) { atomic_init("a->waiting, 0); ISC_LIST_INIT(quota->cbs); isc_mutex_init("a->cblock); + quota->magic = QUOTA_MAGIC; } void isc_quota_destroy(isc_quota_t *quota) { + REQUIRE(VALID_QUOTA(quota)); + quota->magic = 0; + INSIST(atomic_load("a->used) == 0); INSIST(atomic_load("a->waiting) == 0); INSIST(ISC_LIST_EMPTY(quota->cbs)); @@ -40,26 +50,31 @@ isc_quota_destroy(isc_quota_t *quota) { void isc_quota_soft(isc_quota_t *quota, unsigned int soft) { + REQUIRE(VALID_QUOTA(quota)); atomic_store_release("a->soft, soft); } void isc_quota_max(isc_quota_t *quota, unsigned int max) { + REQUIRE(VALID_QUOTA(quota)); atomic_store_release("a->max, max); } unsigned int isc_quota_getmax(isc_quota_t *quota) { + REQUIRE(VALID_QUOTA(quota)); return (atomic_load_relaxed("a->max)); } unsigned int isc_quota_getsoft(isc_quota_t *quota) { + REQUIRE(VALID_QUOTA(quota)); return (atomic_load_relaxed("a->soft)); } unsigned int isc_quota_getused(isc_quota_t *quota) { + REQUIRE(VALID_QUOTA(quota)); return (atomic_load_relaxed("a->used)); } @@ -140,13 +155,21 @@ doattach(isc_quota_t *quota, isc_quota_t **p) { } isc_result_t -isc_quota_attach(isc_quota_t *quota, isc_quota_t **p) { - return (isc_quota_attach_cb(quota, p, NULL)); +isc_quota_attach(isc_quota_t *quota, isc_quota_t **quotap) { + REQUIRE(VALID_QUOTA(quota)); + REQUIRE(quotap != NULL && *quotap == NULL); + + return (isc_quota_attach_cb(quota, quotap, NULL)); } isc_result_t -isc_quota_attach_cb(isc_quota_t *quota, isc_quota_t **p, isc_quota_cb_t *cb) { - isc_result_t result = doattach(quota, p); +isc_quota_attach_cb(isc_quota_t *quota, isc_quota_t **quotap, + isc_quota_cb_t *cb) { + REQUIRE(VALID_QUOTA(quota)); + REQUIRE(cb == NULL || VALID_QUOTA_CB(cb)); + REQUIRE(quotap != NULL && *quotap == NULL); + + isc_result_t result = doattach(quota, quotap); if (result == ISC_R_QUOTA && cb != NULL) { LOCK("a->cblock); enqueue(quota, cb); @@ -160,11 +183,14 @@ isc_quota_cb_init(isc_quota_cb_t *cb, isc_quota_cb_func_t cb_func, void *data) { ISC_LINK_INIT(cb, link); cb->cb_func = cb_func; cb->data = data; + cb->magic = QUOTA_CB_MAGIC; } void -isc_quota_detach(isc_quota_t **p) { - INSIST(p != NULL && *p != NULL); - quota_release(*p); - *p = NULL; +isc_quota_detach(isc_quota_t **quotap) { + REQUIRE(quotap != NULL && VALID_QUOTA(*quotap)); + isc_quota_t *quota = *quotap; + *quotap = NULL; + + quota_release(quota); } diff --git a/lib/isc/tests/Makefile.am b/lib/isc/tests/Makefile.am index 87d59b44e7..50b164da9e 100644 --- a/lib/isc/tests/Makefile.am +++ b/lib/isc/tests/Makefile.am @@ -29,7 +29,6 @@ check_PROGRAMS = \ md_test \ mem_test \ netaddr_test \ - netmgr_test \ parse_test \ pool_test \ quota_test \ @@ -44,8 +43,12 @@ check_PROGRAMS = \ symtab_test \ task_test \ taskpool_test \ + tcp_test \ + tcp_quota_test \ + tcpdns_test \ time_test \ - timer_test + timer_test \ + udp_test TESTS = $(check_PROGRAMS) @@ -69,11 +72,39 @@ random_test_LDADD = \ $(LDADD) \ -lm -netmgr_test_CPPFLAGS = \ +tcp_test_CPPFLAGS = \ $(AM_CPPFLAGS) \ + $(OPENSSL_CFLAGS) \ $(LIBUV_CFLAGS) -netmgr_test_LDADD = \ +tcp_test_LDADD = \ + $(LDADD) \ + $(LIBUV_LIBS) + +tcp_quota_test_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + $(OPENSSL_CFLAGS) \ + $(LIBUV_CFLAGS) + +tcp_quota_test_LDADD = \ + $(LDADD) \ + $(LIBUV_LIBS) + +tcpdns_test_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + $(OPENSSL_CFLAGS) \ + $(LIBUV_CFLAGS) + +tcpdns_test_LDADD = \ + $(LDADD) \ + $(LIBUV_LIBS) + +udp_test_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + $(OPENSSL_CFLAGS) \ + $(LIBUV_CFLAGS) + +udp_test_LDADD = \ $(LDADD) \ $(LIBUV_LIBS) diff --git a/lib/isc/tests/tcp_quota_test.c b/lib/isc/tests/tcp_quota_test.c new file mode 100644 index 0000000000..849213b677 --- /dev/null +++ b/lib/isc/tests/tcp_quota_test.c @@ -0,0 +1,737 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#if HAVE_CMOCKA +#include /* IWYU pragma: keep */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define UNIT_TESTING +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../netmgr/netmgr-int.h" +#include "isctest.h" + +#define MAX_NM 2 + +static isc_sockaddr_t tcp_listen_addr; + +static uint64_t send_magic = 0; +static uint64_t stop_magic = 0; + +static uv_buf_t send_msg = { .base = (char *)&send_magic, + .len = sizeof(send_magic) }; +static uv_buf_t stop_msg = { .base = (char *)&stop_magic, + .len = sizeof(stop_magic) }; + +static atomic_uint_fast64_t nsends; + +static atomic_uint_fast64_t ssends; +static atomic_uint_fast64_t sreads; + +static atomic_uint_fast64_t saccepts; + +static atomic_uint_fast64_t cconnects; +static atomic_uint_fast64_t csends; +static atomic_uint_fast64_t creads; +static atomic_uint_fast64_t ctimeouts; + +static unsigned int workers = 2; + +static isc_quota_t listener_quota; +static atomic_bool check_listener_quota; + +#define NSENDS 100 +#define NWRITES 10 + +/* Enable this to print values while running tests */ +#undef PRINT_DEBUG +#ifdef PRINT_DEBUG +#define X(v) fprintf(stderr, #v " = %" PRIu64 "\n", atomic_load(&v)) +#define P(v) fprintf(stderr, #v " = %" PRIu64 "\n", v) +#else +#define X(v) +#define P(v) +#endif + +static int +setup_ephemeral_port(isc_sockaddr_t *addr, sa_family_t family) { + isc_result_t result; + socklen_t addrlen = sizeof(*addr); + int fd; + int r; + + isc_sockaddr_fromin6(addr, &in6addr_loopback, 0); + + fd = socket(AF_INET6, family, 0); + if (fd < 0) { + perror("setup_ephemeral_port: socket()"); + return (-1); + } + + r = bind(fd, (const struct sockaddr *)&addr->type.sa, + sizeof(addr->type.sin6)); + if (r != 0) { + perror("setup_ephemeral_port: bind()"); + close(fd); + return (r); + } + + r = getsockname(fd, (struct sockaddr *)&addr->type.sa, &addrlen); + if (r != 0) { + perror("setup_ephemeral_port: getsockname()"); + close(fd); + return (r); + } + + result = isc__nm_socket_reuse(fd); + if (result != ISC_R_SUCCESS && result != ISC_R_NOTIMPLEMENTED) { + fprintf(stderr, + "setup_ephemeral_port: isc__nm_socket_reuse(): %s", + isc_result_totext(result)); + close(fd); + return (-1); + } + + result = isc__nm_socket_reuse_lb(fd); + if (result != ISC_R_SUCCESS && result != ISC_R_NOTIMPLEMENTED) { + fprintf(stderr, + "setup_ephemeral_port: isc__nm_socket_reuse_lb(): %s", + isc_result_totext(result)); + close(fd); + return (-1); + } + +#if IPV6_RECVERR +#define setsockopt_on(socket, level, name) \ + setsockopt(socket, level, name, &(int){ 1 }, sizeof(int)) + + r = setsockopt_on(fd, IPPROTO_IPV6, IPV6_RECVERR); + if (r != 0) { + perror("setup_ephemeral_port"); + close(fd); + return (r); + } +#endif + + return (fd); +} + +static int +_setup(void **state) { + UNUSED(state); + + /* workers = isc_os_ncpus(); */ + + if (isc_test_begin(NULL, true, workers) != ISC_R_SUCCESS) { + return (-1); + } + + signal(SIGPIPE, SIG_IGN); + + return (0); +} + +static int +_teardown(void **state) { + UNUSED(state); + + isc_test_end(); + + return (0); +} + +/* Generic */ + +thread_local uint8_t tcp_buffer_storage[4096]; +thread_local size_t tcp_buffer_length = 0; + +static int +nm_setup(void **state) { + size_t nworkers = ISC_MAX(ISC_MIN(workers, 32), 1); + int tcp_listen_sock = -1; + isc_nm_t **nm = NULL; + + tcp_listen_addr = (isc_sockaddr_t){ .length = 0 }; + tcp_listen_sock = setup_ephemeral_port(&tcp_listen_addr, SOCK_STREAM); + if (tcp_listen_sock < 0) { + return (-1); + } + close(tcp_listen_sock); + tcp_listen_sock = -1; + + atomic_store(&nsends, NSENDS * NWRITES); + + atomic_store(&csends, 0); + atomic_store(&creads, 0); + atomic_store(&sreads, 0); + atomic_store(&ssends, 0); + atomic_store(&saccepts, 0); + atomic_store(&ctimeouts, 0); + atomic_store(&cconnects, 0); + + isc_nonce_buf(&send_magic, sizeof(send_magic)); + isc_nonce_buf(&stop_magic, sizeof(stop_magic)); + if (send_magic == stop_magic) { + return (-1); + } + + nm = isc_mem_get(test_mctx, MAX_NM * sizeof(nm[0])); + for (size_t i = 0; i < MAX_NM; i++) { + nm[i] = isc_nm_start(test_mctx, nworkers); + assert_non_null(nm[i]); + } + + *state = nm; + + isc_quota_init(&listener_quota, 0); + atomic_store(&check_listener_quota, false); + return (0); +} + +static int +nm_teardown(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + + for (size_t i = 0; i < MAX_NM; i++) { + isc_nm_destroy(&nm[i]); + assert_null(nm[i]); + } + isc_mem_put(test_mctx, nm, MAX_NM * sizeof(nm[0])); + + isc_quota_destroy(&listener_quota); + return (0); +} + +thread_local size_t nwrites = NWRITES; + +/* TCP Connect */ + +static void +tcp_connect_send_cb(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg); + +static void +tcp_connect_send(isc_nmhandle_t *handle); + +static void +tcp_connect_read_cb(isc_nmhandle_t *handle, isc_result_t eresult, + isc_region_t *region, void *cbarg) { + uint64_t magic = 0; + + UNUSED(cbarg); + + assert_non_null(handle); + if (eresult != ISC_R_SUCCESS) { + goto unref; + } + + memmove(tcp_buffer_storage + tcp_buffer_length, region->base, + region->length); + tcp_buffer_length += region->length; + + if (tcp_buffer_length >= sizeof(magic)) { + isc_nm_pauseread(handle); + + atomic_fetch_add(&creads, 1); + + magic = *(uint64_t *)tcp_buffer_storage; + assert_true(magic == stop_magic || magic == send_magic); + + tcp_buffer_length -= sizeof(magic); + memmove(tcp_buffer_storage, tcp_buffer_storage + sizeof(magic), + tcp_buffer_length); + + if (magic == send_magic) { + tcp_connect_send(handle); + return; + } else if (magic == stop_magic) { + /* We are done, so we don't send anything back */ + /* There should be no more packets in the buffer */ + assert_int_equal(tcp_buffer_length, 0); + } + } +unref: + isc_nmhandle_detach(&handle); +} + +static void +tcp_connect_send_cb(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg) { + assert_non_null(handle); + UNUSED(cbarg); + + if (eresult == ISC_R_SUCCESS) { + atomic_fetch_add(&csends, 1); + isc_nm_resumeread(handle); + } else { + /* Send failed, we need to stop reading too */ + isc_nm_cancelread(handle); + } +} + +static void +tcp_connect_shutdown(isc_nmhandle_t *handle, isc_result_t eresult, + void *cbarg) { + UNUSED(cbarg); + + assert_non_null(handle); + + if (eresult == ISC_R_SUCCESS) { + atomic_fetch_add(&csends, 1); + } else { + isc_nm_cancelread(handle); + } +} + +static void +tcp_connect_send(isc_nmhandle_t *handle) { + uint_fast64_t sends = atomic_load(&nsends); + + while (sends > 0) { + /* Continue until we subtract or we are done */ + if (atomic_compare_exchange_weak(&nsends, &sends, sends - 1)) { + sends--; + break; + } + } + + if (sends == 0) { + isc_nm_send(handle, (isc_region_t *)&stop_msg, + tcp_connect_shutdown, NULL); + } else { + isc_nm_send(handle, (isc_region_t *)&send_msg, + tcp_connect_send_cb, NULL); + } +} + +static void +tcp_connect_connect_cb(isc_nmhandle_t *handle, isc_result_t eresult, + void *cbarg) { + isc_nmhandle_t *readhandle = NULL; + + UNUSED(cbarg); + + if (eresult != ISC_R_SUCCESS) { + uint_fast64_t sends = atomic_load(&nsends); + + /* We failed to connect; try again */ + while (sends > 0) { + /* Continue until we subtract or we are done */ + if (atomic_compare_exchange_weak(&nsends, &sends, + sends - 1)) { + sends--; + break; + } + } + return; + } + + atomic_fetch_add(&cconnects, 1); + + isc_nmhandle_attach(handle, &readhandle); + isc_nm_read(handle, tcp_connect_read_cb, NULL); + + tcp_connect_send(handle); +} + +static isc_result_t +tcp_listen_accept_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg); + +static isc_threadresult_t +tcp_connect_thread(isc_threadarg_t arg) { + isc_nm_t *connect_nm = (isc_nm_t *)arg; + isc_sockaddr_t tcp_connect_addr; + + tcp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&tcp_connect_addr, &in6addr_loopback, 0); + + while (atomic_load(&nsends) > 0) { + (void)isc_nm_tcpconnect(connect_nm, + (isc_nmiface_t *)&tcp_connect_addr, + (isc_nmiface_t *)&tcp_listen_addr, + tcp_connect_connect_cb, NULL, 1000, 0); + } + + return ((isc_threadresult_t)0); +} + +static isc_quota_t * +tcp_listener_init_quota(size_t nthreads) { + isc_quota_t *quotap = NULL; + if (atomic_load(&check_listener_quota)) { + unsigned max_quota = ISC_MAX(nthreads / 2, 1); + isc_quota_max(&listener_quota, max_quota); + quotap = &listener_quota; + } + return quotap; +} + +static void +tcp_recv_send(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); + isc_nmsocket_t *listen_sock = NULL; + isc_thread_t threads[32] = { 0 }; + isc_quota_t *quotap = tcp_listener_init_quota(nthreads); + + result = isc_nm_listentcp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, + tcp_listen_accept_cb, NULL, 0, 0, quotap, + &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_create(tcp_connect_thread, connect_nm, &threads[i]); + } + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_join(threads[i], NULL); + } + + isc_nm_closedown(connect_nm); + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + X(saccepts); + + /* assert_true(atomic_load(&csends) >= atomic_load(&sreads)); */ + assert_true(atomic_load(&sreads) >= atomic_load(&ssends)); + /* assert_true(atomic_load(&ssends) >= atomic_load(&creads)); */ + assert_true(atomic_load(&creads) <= atomic_load(&csends)); + assert_true(atomic_load(&creads) >= atomic_load(&ctimeouts)); +} + +static void +tcp_recv_half_send(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); + isc_nmsocket_t *listen_sock = NULL; + isc_thread_t threads[32] = { 0 }; + isc_quota_t *quotap = tcp_listener_init_quota(nthreads); + + result = isc_nm_listentcp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, + tcp_listen_accept_cb, NULL, 0, 0, quotap, + &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_create(tcp_connect_thread, connect_nm, &threads[i]); + } + + while (atomic_load(&nsends) >= (NSENDS * NWRITES) / 2) { + isc_thread_yield(); + } + + isc_nm_closedown(connect_nm); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_join(threads[i], NULL); + } + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + X(saccepts); + + /* assert_true(atomic_load(&csends) >= atomic_load(&sreads)); */ + assert_true(atomic_load(&sreads) >= atomic_load(&ssends)); + /* assert_true(atomic_load(&ssends) >= atomic_load(&creads)); */ + assert_true(atomic_load(&creads) <= atomic_load(&csends)); + assert_true(atomic_load(&creads) >= atomic_load(&ctimeouts)); +} + +static void +tcp_half_recv_send(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); + isc_nmsocket_t *listen_sock = NULL; + isc_thread_t threads[32] = { 0 }; + isc_quota_t *quotap = tcp_listener_init_quota(nthreads); + + result = isc_nm_listentcp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, + tcp_listen_accept_cb, NULL, 0, 0, quotap, + &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_create(tcp_connect_thread, connect_nm, &threads[i]); + } + + while (atomic_load(&nsends) >= (NSENDS * NWRITES) / 2) { + isc_thread_yield(); + } + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_join(threads[i], NULL); + } + + isc_nm_closedown(connect_nm); + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + X(saccepts); + + /* assert_true(atomic_load(&csends) >= atomic_load(&sreads)); */ + assert_true(atomic_load(&sreads) >= atomic_load(&ssends)); + /* assert_true(atomic_load(&ssends) >= atomic_load(&creads)); */ + assert_true(atomic_load(&creads) <= atomic_load(&csends)); + assert_true(atomic_load(&creads) >= atomic_load(&ctimeouts)); +} + +static void +tcp_half_recv_half_send(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); + isc_nmsocket_t *listen_sock = NULL; + isc_thread_t threads[32] = { 0 }; + isc_quota_t *quotap = tcp_listener_init_quota(nthreads); + + result = isc_nm_listentcp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, + tcp_listen_accept_cb, NULL, 0, 0, quotap, + &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_create(tcp_connect_thread, connect_nm, &threads[i]); + } + + while (atomic_load(&nsends) >= (NSENDS * NWRITES) / 2) { + isc_thread_yield(); + } + + isc_nm_closedown(connect_nm); + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_join(threads[i], NULL); + } + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + X(saccepts); + + /* assert_true(atomic_load(&csends) >= atomic_load(&sreads)); */ + assert_true(atomic_load(&sreads) >= atomic_load(&ssends)); + /* assert_true(atomic_load(&ssends) >= atomic_load(&creads)); */ + assert_true(atomic_load(&creads) <= atomic_load(&csends)); + assert_true(atomic_load(&creads) >= atomic_load(&ctimeouts)); +} + +static void +tcp_recv_send_quota(void **state) { + atomic_store(&check_listener_quota, true); + tcp_recv_send(state); +} + +static void +tcp_recv_half_send_quota(void **state) { + atomic_store(&check_listener_quota, true); + tcp_recv_half_send(state); +} + +static void +tcp_half_recv_send_quota(void **state) { + atomic_store(&check_listener_quota, true); + tcp_half_recv_send(state); +} + +static void +tcp_half_recv_half_send_quota(void **state) { + atomic_store(&check_listener_quota, true); + tcp_half_recv_half_send(state); +} + +/* TCP Listener */ + +/* + * TODO: + * 1. write a timeout test + */ + +static void +tcp_listen_read_cb(isc_nmhandle_t *handle, isc_result_t eresult, + isc_region_t *region, void *cbarg); + +static void +tcp_listen_send_cb(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg) { + UNUSED(eresult); + UNUSED(cbarg); + + assert_non_null(handle); + + if (eresult == ISC_R_SUCCESS) { + atomic_fetch_add(&ssends, 1); + isc_nm_resumeread(handle); + } +} + +static void +tcp_listen_read_cb(isc_nmhandle_t *handle, isc_result_t eresult, + isc_region_t *region, void *cbarg) { + uint64_t magic = 0; + + UNUSED(cbarg); + + assert_non_null(handle); + + if (eresult != ISC_R_SUCCESS) { + goto unref; + } + + atomic_fetch_add(&sreads, 1); + + memmove(tcp_buffer_storage + tcp_buffer_length, region->base, + region->length); + tcp_buffer_length += region->length; + + if (tcp_buffer_length >= sizeof(magic)) { + isc_nm_pauseread(handle); + + magic = *(uint64_t *)tcp_buffer_storage; + assert_true(magic == stop_magic || magic == send_magic); + + tcp_buffer_length -= sizeof(magic); + memmove(tcp_buffer_storage, tcp_buffer_storage + sizeof(magic), + tcp_buffer_length); + + if (magic == send_magic) { + isc_nm_send(handle, region, tcp_listen_send_cb, NULL); + return; + } else if (magic == stop_magic) { + /* We are done, so we don't send anything back */ + /* There should be no more packets in the buffer */ + assert_int_equal(tcp_buffer_length, 0); + if (atomic_load(&check_listener_quota)) { + int_fast32_t concurrent = + isc__nm_tcp_listener_nactive( + handle->sock->server->parent); + assert_true(concurrent >= 0); + assert_true((uint_fast32_t)concurrent <= + isc_quota_getmax(&listener_quota)); + P(concurrent); + } + } + } +unref: + isc_nmhandle_detach(&handle); +} + +static isc_result_t +tcp_listen_accept_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { + isc_nmhandle_t *readhandle = NULL; + + UNUSED(cbarg); + + if (result != ISC_R_SUCCESS) { + return (result); + } + + tcp_buffer_length = 0; + + atomic_fetch_add(&saccepts, 1); + + if (atomic_load(&check_listener_quota)) { + int_fast32_t concurrent = isc__nm_tcp_listener_nactive( + handle->sock->server->parent); + assert_true(concurrent >= 0); + assert_true((uint_fast32_t)concurrent <= + isc_quota_getmax(&listener_quota)); + P(concurrent); + } + + isc_nmhandle_attach(handle, &readhandle); + isc_nm_read(handle, tcp_listen_read_cb, NULL); + + return (ISC_R_SUCCESS); +} + +int +main(void) { + const struct CMUnitTest tests[] = { + cmocka_unit_test_setup_teardown(tcp_recv_send_quota, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(tcp_recv_half_send_quota, + nm_setup, nm_teardown), + cmocka_unit_test_setup_teardown(tcp_half_recv_send_quota, + nm_setup, nm_teardown), + cmocka_unit_test_setup_teardown(tcp_half_recv_half_send_quota, + nm_setup, nm_teardown) + }; + + return (cmocka_run_group_tests(tests, _setup, _teardown)); +} + +#else /* HAVE_CMOCKA */ + +#include + +int +main(void) { + printf("1..0 # Skipped: cmocka not available\n"); + return (0); +} + +#endif /* if HAVE_CMOCKA */ diff --git a/lib/isc/tests/netmgr_test.c b/lib/isc/tests/tcp_test.c similarity index 59% rename from lib/isc/tests/netmgr_test.c rename to lib/isc/tests/tcp_test.c index 268e5199b1..a27ab80ebb 100644 --- a/lib/isc/tests/netmgr_test.c +++ b/lib/isc/tests/tcp_test.c @@ -20,8 +20,6 @@ #include #include -#if UV_VERSION_MAJOR > 1 || (UV_VERSION_MAJOR == 1 && UV_VERSION_MINOR >= 27) - #define UNIT_TESTING #include @@ -36,15 +34,18 @@ #include #include +#include "uv_wrap.h" +#define KEEP_BEFORE + #include "../netmgr/netmgr-int.h" +#include "../netmgr/tcp.c" +#include "../netmgr/uv-compat.c" +#include "../netmgr/uv-compat.h" #include "isctest.h" #define MAX_NM 2 -static isc_sockaddr_t udp_listen_addr; -static isc_sockaddr_t udp_connect_addr; static isc_sockaddr_t tcp_listen_addr; -static isc_sockaddr_t tcp_connect_addr; static uint64_t send_magic = 0; static uint64_t stop_magic = 0; @@ -64,11 +65,27 @@ static atomic_uint_fast64_t csends; static atomic_uint_fast64_t creads; static atomic_uint_fast64_t ctimeouts; -static unsigned int workers = 2; +static unsigned int workers = 3; + +static bool reuse_supported = true; #define NSENDS 100 #define NWRITES 10 +#define CHECK_RANGE_FULL(v) \ + { \ + int __v = atomic_load(&v); \ + assert_true(__v > NSENDS * NWRITES * 10 / 100); \ + assert_true(__v <= NSENDS * NWRITES * 110 / 100); \ + } + +#define CHECK_RANGE_HALF(v) \ + { \ + int __v = atomic_load(&v); \ + assert_true(__v > NSENDS * NWRITES * 5 / 100); \ + assert_true(__v <= NSENDS * NWRITES * 110 / 100); \ + } + /* Enable this to print values while running tests */ #undef PRINT_DEBUG #ifdef PRINT_DEBUG @@ -124,6 +141,9 @@ setup_ephemeral_port(isc_sockaddr_t *addr, sa_family_t family) { close(fd); return (-1); } + if (result == ISC_R_NOTIMPLEMENTED) { + reuse_supported = false; + } #if IPV6_RECVERR #define setsockopt_on(socket, level, name) \ @@ -166,15 +186,6 @@ _teardown(void **state) { /* Generic */ -static void -noop_recv_cb(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, - void *cbarg) { - UNUSED(handle); - UNUSED(eresult); - UNUSED(region); - UNUSED(cbarg); -} - static unsigned int noop_accept_cb(isc_nmhandle_t *handle, unsigned int result, void *cbarg) { UNUSED(handle); @@ -197,18 +208,9 @@ thread_local size_t tcp_buffer_length = 0; static int nm_setup(void **state) { size_t nworkers = ISC_MAX(ISC_MIN(workers, 32), 1); - int udp_listen_sock = -1; int tcp_listen_sock = -1; isc_nm_t **nm = NULL; - udp_listen_addr = (isc_sockaddr_t){ .length = 0 }; - udp_listen_sock = setup_ephemeral_port(&udp_listen_addr, SOCK_DGRAM); - if (udp_listen_sock < 0) { - return (-1); - } - close(udp_listen_sock); - udp_listen_sock = -1; - tcp_listen_addr = (isc_sockaddr_t){ .length = 0 }; tcp_listen_sock = setup_ephemeral_port(&tcp_listen_addr, SOCK_STREAM); if (tcp_listen_sock < 0) { @@ -269,24 +271,20 @@ tcp_connect_send(isc_nmhandle_t *handle); static void tcp_connect_read_cb(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, void *cbarg) { - isc_nmhandle_t *readhandle = handle; uint64_t magic = 0; UNUSED(cbarg); assert_non_null(handle); if (eresult != ISC_R_SUCCESS) { - isc_nmhandle_detach(&readhandle); - return; + goto unref; } memmove(tcp_buffer_storage + tcp_buffer_length, region->base, region->length); tcp_buffer_length += region->length; - if (tcp_buffer_length >= sizeof(magic)) { - isc_nm_pauseread(handle); - + while (tcp_buffer_length >= sizeof(magic)) { atomic_fetch_add(&creads, 1); magic = *(uint64_t *)tcp_buffer_storage; @@ -298,17 +296,19 @@ tcp_connect_read_cb(isc_nmhandle_t *handle, isc_result_t eresult, if (magic == send_magic) { tcp_connect_send(handle); + return; } else if (magic == stop_magic) { /* We are done, so we don't send anything back */ /* There should be no more packets in the buffer */ assert_int_equal(tcp_buffer_length, 0); } } +unref: + isc_nmhandle_detach(&handle); } static void tcp_connect_send_cb(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg) { - isc_nmhandle_t *sendhandle = handle; assert_non_null(handle); UNUSED(cbarg); @@ -319,40 +319,35 @@ tcp_connect_send_cb(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg) { /* Send failed, we need to stop reading too */ isc_nm_cancelread(handle); } - - isc_nmhandle_detach(&sendhandle); } static void tcp_connect_shutdown(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg) { - isc_nmhandle_t *sendhandle = handle; UNUSED(cbarg); assert_non_null(handle); - isc_nm_cancelread(handle); - if (eresult == ISC_R_SUCCESS) { atomic_fetch_add(&csends, 1); + } else { + /* Send failed, we need to stop reading too */ + isc_nm_cancelread(handle); } - - isc_nmhandle_detach(&sendhandle); } static void tcp_connect_send(isc_nmhandle_t *handle) { - isc_nmhandle_t *sendhandle = NULL; uint_fast64_t sends = atomic_load(&nsends); while (sends > 0) { /* Continue until we subtract or we are done */ if (atomic_compare_exchange_weak(&nsends, &sends, sends - 1)) { + sends--; break; } } - isc_nmhandle_attach(handle, &sendhandle); if (sends == 0) { isc_nm_send(handle, (isc_region_t *)&stop_msg, tcp_connect_shutdown, NULL); @@ -366,6 +361,7 @@ static void tcp_connect_connect_cb(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg) { isc_nmhandle_t *readhandle = NULL; + UNUSED(cbarg); if (eresult != ISC_R_SUCCESS) { @@ -376,6 +372,7 @@ tcp_connect_connect_cb(isc_nmhandle_t *handle, isc_result_t eresult, /* Continue until we subtract or we are done */ if (atomic_compare_exchange_weak(&nsends, &sends, sends - 1)) { + sends--; break; } } @@ -385,11 +382,144 @@ tcp_connect_connect_cb(isc_nmhandle_t *handle, isc_result_t eresult, atomic_fetch_add(&cconnects, 1); isc_nmhandle_attach(handle, &readhandle); - isc_nm_read(readhandle, tcp_connect_read_cb, readhandle); + isc_nm_read(handle, tcp_connect_read_cb, NULL); tcp_connect_send(handle); } +static void +mock_listentcp_uv_tcp_bind(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t tcp_connect_addr; + + tcp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&tcp_connect_addr, &in6addr_loopback, 0); + + WILL_RETURN(uv_tcp_bind, UV_EADDRINUSE); + + result = isc_nm_listentcp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, + noop_accept_cb, NULL, 0, 0, NULL, + &listen_sock); + assert_int_not_equal(result, ISC_R_SUCCESS); + assert_null(listen_sock); + + RESET_RETURN; +} + +static void +mock_listentcp_uv_fileno(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t tcp_connect_addr; + + tcp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&tcp_connect_addr, &in6addr_loopback, 0); + + WILL_RETURN(uv_fileno, UV_EADDRINUSE); + + result = isc_nm_listentcp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, + noop_accept_cb, NULL, 0, 0, NULL, + &listen_sock); + assert_int_not_equal(result, ISC_R_SUCCESS); + assert_null(listen_sock); + + RESET_RETURN; +} + +static void +mock_listentcp_uv_tcp_getsockname(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t tcp_connect_addr; + + tcp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&tcp_connect_addr, &in6addr_loopback, 0); + + WILL_RETURN(uv_tcp_getsockname, UV_EADDRINUSE); + + result = isc_nm_listentcp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, + noop_accept_cb, NULL, 0, 0, NULL, + &listen_sock); + assert_int_not_equal(result, ISC_R_SUCCESS); + assert_null(listen_sock); + + RESET_RETURN; +} + +static void +mock_listentcp_uv_listen(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t tcp_connect_addr; + + tcp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&tcp_connect_addr, &in6addr_loopback, 0); + + WILL_RETURN(uv_listen, UV_EADDRINUSE); + + result = isc_nm_listentcp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, + noop_accept_cb, NULL, 0, 0, NULL, + &listen_sock); + assert_int_not_equal(result, ISC_R_SUCCESS); + assert_null(listen_sock); + + RESET_RETURN; +} + +static void +mock_tcpconnect_uv_tcp_bind(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_sockaddr_t tcp_connect_addr; + + tcp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&tcp_connect_addr, &in6addr_loopback, 0); + + WILL_RETURN(uv_tcp_bind, UV_ENOMEM); + + result = isc_nm_tcpconnect(connect_nm, + (isc_nmiface_t *)&tcp_connect_addr, + (isc_nmiface_t *)&tcp_listen_addr, + noop_connect_cb, NULL, 1000, 0); + assert_int_not_equal(result, ISC_R_SUCCESS); + + isc_nm_closedown(connect_nm); + + RESET_RETURN; +} + +static void +mock_tcpconnect_uv_tcp_connect(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_sockaddr_t tcp_connect_addr; + + tcp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&tcp_connect_addr, &in6addr_loopback, 0); + + WILL_RETURN(uv_tcp_connect, UV_ENOMEM); + + result = isc_nm_tcpconnect( + connect_nm, (isc_nmiface_t *)&tcp_connect_addr, + (isc_nmiface_t *)&tcp_listen_addr, noop_connect_cb, NULL, 1, 0); + assert_int_not_equal(result, ISC_R_SUCCESS); + + isc_nm_closedown(connect_nm); + + RESET_RETURN; +} + static void tcp_noop(void **state) { isc_nm_t **nm = (isc_nm_t **)*state; @@ -397,12 +527,14 @@ tcp_noop(void **state) { isc_nm_t *connect_nm = nm[1]; isc_result_t result = ISC_R_SUCCESS; isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t tcp_connect_addr; tcp_connect_addr = (isc_sockaddr_t){ .length = 0 }; isc_sockaddr_fromin6(&tcp_connect_addr, &in6addr_loopback, 0); - result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, - noop_recv_cb, NULL, 0, &listen_sock); + result = isc_nm_listentcp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, + noop_accept_cb, NULL, 0, 0, NULL, + &listen_sock); assert_int_equal(result, ISC_R_SUCCESS); isc_nm_stoplistening(listen_sock); @@ -430,6 +562,7 @@ tcp_noresponse(void **state) { isc_nm_t *connect_nm = nm[1]; isc_result_t result = ISC_R_SUCCESS; isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t tcp_connect_addr; tcp_connect_addr = (isc_sockaddr_t){ .length = 0 }; isc_sockaddr_fromin6(&tcp_connect_addr, &in6addr_loopback, 0); @@ -455,6 +588,7 @@ tcp_listen_accept_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg); static isc_threadresult_t tcp_connect_thread(isc_threadarg_t arg) { isc_nm_t *connect_nm = (isc_nm_t *)arg; + isc_sockaddr_t tcp_connect_addr; tcp_connect_addr = (isc_sockaddr_t){ .length = 0 }; isc_sockaddr_fromin6(&tcp_connect_addr, &in6addr_loopback, 0); @@ -469,6 +603,115 @@ tcp_connect_thread(isc_threadarg_t arg) { return ((isc_threadresult_t)0); } +static void +tcp_recv_one(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t tcp_connect_addr; + + tcp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&tcp_connect_addr, &in6addr_loopback, 0); + + atomic_store(&nsends, 1); + + result = isc_nm_listentcp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, + tcp_listen_accept_cb, NULL, 0, 0, NULL, + &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + (void)isc_nm_tcpconnect(connect_nm, (isc_nmiface_t *)&tcp_connect_addr, + (isc_nmiface_t *)&tcp_listen_addr, + tcp_connect_connect_cb, NULL, 1000, 0); + + while (atomic_load(&nsends) > 0) { + isc_thread_yield(); + } + + while (atomic_load(&cconnects) != 1 || atomic_load(&ssends) != 0 || + atomic_load(&sreads) != 1 || atomic_load(&creads) != 0 || + atomic_load(&csends) != 1) + { + isc_thread_yield(); + } + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + isc_nm_closedown(connect_nm); + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + + assert_int_equal(atomic_load(&cconnects), 1); + assert_int_equal(atomic_load(&csends), 1); + assert_int_equal(atomic_load(&creads), 0); + assert_int_equal(atomic_load(&ctimeouts), 0); + assert_int_equal(atomic_load(&sreads), 1); + assert_int_equal(atomic_load(&ssends), 0); +} + +static void +tcp_recv_two(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t tcp_connect_addr; + + tcp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&tcp_connect_addr, &in6addr_loopback, 0); + + atomic_store(&nsends, 2); + + result = isc_nm_listentcp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, + tcp_listen_accept_cb, NULL, 0, 0, NULL, + &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + result = isc_nm_tcpconnect(connect_nm, + (isc_nmiface_t *)&tcp_connect_addr, + (isc_nmiface_t *)&tcp_listen_addr, + tcp_connect_connect_cb, NULL, 1000, 0); + assert_int_equal(result, ISC_R_SUCCESS); + + while (atomic_load(&nsends) > 0) { + isc_thread_yield(); + } + + while (atomic_load(&sreads) < 2 || atomic_load(&ssends) < 1 || + atomic_load(&csends) < 2 || atomic_load(&creads) < 1) + { + isc_thread_yield(); + } + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + isc_nm_closedown(connect_nm); + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + + assert_int_equal(atomic_load(&cconnects), 1); + assert_true(atomic_load(&csends) >= 2); + assert_int_equal(atomic_load(&creads), 1); + assert_int_equal(atomic_load(&ctimeouts), 0); + assert_true(atomic_load(&sreads) >= 2); + assert_int_equal(atomic_load(&ssends), 1); +} + static void tcp_recv_send(void **state) { isc_nm_t **nm = (isc_nm_t **)*state; @@ -479,6 +722,11 @@ tcp_recv_send(void **state) { size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); isc_thread_t threads[32] = { 0 }; + if (!reuse_supported) { + skip(); + return; + } + result = isc_nm_listentcp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, tcp_listen_accept_cb, NULL, 0, 0, NULL, &listen_sock); @@ -504,11 +752,10 @@ tcp_recv_send(void **state) { X(sreads); X(ssends); - /* assert_true(atomic_load(&csends) >= atomic_load(&sreads)); */ - assert_true(atomic_load(&sreads) >= atomic_load(&ssends)); - /* assert_true(atomic_load(&ssends) >= atomic_load(&creads)); */ - assert_true(atomic_load(&creads) <= atomic_load(&csends)); - assert_true(atomic_load(&creads) >= atomic_load(&ctimeouts)); + CHECK_RANGE_FULL(csends); + CHECK_RANGE_FULL(creads); + CHECK_RANGE_FULL(sreads); + CHECK_RANGE_FULL(ssends); } static void @@ -521,6 +768,11 @@ tcp_recv_half_send(void **state) { size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); isc_thread_t threads[32] = { 0 }; + if (!reuse_supported) { + skip(); + return; + } + result = isc_nm_listentcp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, tcp_listen_accept_cb, NULL, 0, 0, NULL, &listen_sock); @@ -551,11 +803,10 @@ tcp_recv_half_send(void **state) { X(sreads); X(ssends); - /* assert_true(atomic_load(&csends) >= atomic_load(&sreads)); */ - assert_true(atomic_load(&sreads) >= atomic_load(&ssends)); - /* assert_true(atomic_load(&ssends) >= atomic_load(&creads)); */ - assert_true(atomic_load(&creads) <= atomic_load(&csends)); - assert_true(atomic_load(&creads) >= atomic_load(&ctimeouts)); + CHECK_RANGE_HALF(csends); + CHECK_RANGE_HALF(creads); + CHECK_RANGE_HALF(sreads); + CHECK_RANGE_HALF(ssends); } static void @@ -568,6 +819,11 @@ tcp_half_recv_send(void **state) { size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); isc_thread_t threads[32] = { 0 }; + if (!reuse_supported) { + skip(); + return; + } + result = isc_nm_listentcp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, tcp_listen_accept_cb, NULL, 0, 0, NULL, &listen_sock); @@ -598,11 +854,10 @@ tcp_half_recv_send(void **state) { X(sreads); X(ssends); - /* assert_true(atomic_load(&csends) >= atomic_load(&sreads)); */ - assert_true(atomic_load(&sreads) >= atomic_load(&ssends)); - /* assert_true(atomic_load(&ssends) >= atomic_load(&creads)); */ - assert_true(atomic_load(&creads) <= atomic_load(&csends)); - assert_true(atomic_load(&creads) >= atomic_load(&ctimeouts)); + CHECK_RANGE_HALF(csends); + CHECK_RANGE_HALF(creads); + CHECK_RANGE_HALF(sreads); + CHECK_RANGE_HALF(ssends); } static void @@ -615,6 +870,11 @@ tcp_half_recv_half_send(void **state) { size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); isc_thread_t threads[32] = { 0 }; + if (!reuse_supported) { + skip(); + return; + } + result = isc_nm_listentcp(listen_nm, (isc_nmiface_t *)&tcp_listen_addr, tcp_listen_accept_cb, NULL, 0, 0, NULL, &listen_sock); @@ -644,11 +904,10 @@ tcp_half_recv_half_send(void **state) { X(sreads); X(ssends); - /* assert_true(atomic_load(&csends) >= atomic_load(&sreads)); */ - assert_true(atomic_load(&sreads) >= atomic_load(&ssends)); - /* assert_true(atomic_load(&ssends) >= atomic_load(&creads)); */ - assert_true(atomic_load(&creads) <= atomic_load(&csends)); - assert_true(atomic_load(&creads) >= atomic_load(&ctimeouts)); + CHECK_RANGE_HALF(csends); + CHECK_RANGE_HALF(creads); + CHECK_RANGE_HALF(sreads); + CHECK_RANGE_HALF(ssends); } /* TCP Listener */ @@ -665,32 +924,30 @@ tcp_listen_read_cb(isc_nmhandle_t *handle, isc_result_t eresult, static void tcp_listen_send_cb(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg) { - isc_nmhandle_t *sendhandle = (isc_nmhandle_t *)cbarg; - UNUSED(eresult); + UNUSED(cbarg); assert_non_null(handle); if (eresult == ISC_R_SUCCESS) { atomic_fetch_add(&ssends, 1); isc_nm_resumeread(handle); + } else { + isc_nm_cancelread(handle); } - - isc_nmhandle_detach(&sendhandle); } static void tcp_listen_read_cb(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, void *cbarg) { - isc_nmhandle_t *readhandle = (isc_nmhandle_t *)cbarg; - isc_nmhandle_t *sendhandle = NULL; uint64_t magic = 0; + UNUSED(cbarg); + assert_non_null(handle); if (eresult != ISC_R_SUCCESS) { - isc_nmhandle_detach(&readhandle); - return; + goto unref; } atomic_fetch_add(&sreads, 1); @@ -699,9 +956,7 @@ tcp_listen_read_cb(isc_nmhandle_t *handle, isc_result_t eresult, region->length); tcp_buffer_length += region->length; - if (tcp_buffer_length >= sizeof(magic)) { - isc_nm_pauseread(handle); - + while (tcp_buffer_length >= sizeof(magic)) { magic = *(uint64_t *)tcp_buffer_storage; assert_true(magic == stop_magic || magic == send_magic); @@ -710,15 +965,17 @@ tcp_listen_read_cb(isc_nmhandle_t *handle, isc_result_t eresult, tcp_buffer_length); if (magic == send_magic) { - isc_nmhandle_attach(handle, &sendhandle); - isc_nm_send(handle, region, tcp_listen_send_cb, - sendhandle); + isc_nm_send(handle, region, tcp_listen_send_cb, NULL); + return; } else if (magic == stop_magic) { /* We are done, so we don't send anything back */ /* There should be no more packets in the buffer */ assert_int_equal(tcp_buffer_length, 0); } } + +unref: + isc_nmhandle_detach(&handle); } static isc_result_t @@ -736,439 +993,35 @@ tcp_listen_accept_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { /* atomic_fetch_add(&saccept, 1); */ isc_nmhandle_attach(handle, &readhandle); - isc_nm_read(readhandle, tcp_listen_read_cb, readhandle); + isc_nm_read(handle, tcp_listen_read_cb, NULL); return (ISC_R_SUCCESS); } -/* UDP */ - -static void -udp_listen_send_cb(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg) { - isc_nmhandle_t *sendhandle = handle; - assert_non_null(handle); - UNUSED(cbarg); - - if (eresult == ISC_R_SUCCESS) { - atomic_fetch_add(&ssends, 1); - } - - isc_nmhandle_detach(&sendhandle); -} - -static void -udp_listen_recv_cb(isc_nmhandle_t *handle, isc_result_t eresult, - isc_region_t *region, void *cbarg) { - isc_nmhandle_t *sendhandle = NULL; - uint64_t magic = 0; - - assert_null(cbarg); - - if (eresult != ISC_R_SUCCESS) { - return; - } - - assert_int_equal(region->length, sizeof(send_magic)); - atomic_fetch_add(&sreads, 1); - magic = *(uint64_t *)region->base; - - assert_true(magic == stop_magic || magic == send_magic); - if (magic == send_magic) { - isc_nmhandle_attach(handle, &sendhandle); - isc_nm_send(sendhandle, region, udp_listen_send_cb, NULL); - } else if (magic == stop_magic) { - /* We are done */ - } -} - -static void -udp_noop(void **state) { - isc_nm_t **nm = (isc_nm_t **)*state; - isc_nm_t *listen_nm = nm[0]; - isc_nm_t *connect_nm = nm[1]; - isc_result_t result = ISC_R_SUCCESS; - isc_nmsocket_t *listen_sock = NULL; - - udp_connect_addr = (isc_sockaddr_t){ .length = 0 }; - isc_sockaddr_fromin6(&udp_connect_addr, &in6addr_loopback, 0); - - result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&udp_listen_addr, - noop_recv_cb, NULL, 0, &listen_sock); - assert_int_equal(result, ISC_R_SUCCESS); - - isc_nm_stoplistening(listen_sock); - isc_nmsocket_close(&listen_sock); - assert_null(listen_sock); - - (void)isc_nm_udpconnect(connect_nm, (isc_nmiface_t *)&udp_connect_addr, - (isc_nmiface_t *)&udp_listen_addr, - noop_connect_cb, NULL, 1, 0); - - isc_nm_closedown(connect_nm); - - assert_int_equal(0, atomic_load(&cconnects)); - assert_int_equal(0, atomic_load(&csends)); - assert_int_equal(0, atomic_load(&creads)); - assert_int_equal(0, atomic_load(&ctimeouts)); - assert_int_equal(0, atomic_load(&sreads)); - assert_int_equal(0, atomic_load(&ssends)); -} - -static void -udp_connect_send_cb(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg); -static void -udp_connect_recv_cb(isc_nmhandle_t *handle, isc_result_t eresult, - isc_region_t *region, void *cbarg); - -static void -udp_connect_send_cb(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg) { - isc_nmhandle_t *sendhandle = handle; - - assert_non_null(handle); - - UNUSED(eresult); - UNUSED(cbarg); - - atomic_fetch_add(&csends, 1); - isc_nmhandle_detach(&sendhandle); -} - -static void -udp_connect_send(isc_nmhandle_t *handle) { - isc_nmhandle_t *sendhandle = NULL; - uint_fast64_t sends = atomic_load(&nsends); - - while (sends > 0) { - /* Continue until we subtract or we are done */ - if (atomic_compare_exchange_weak(&nsends, &sends, sends - 1)) { - break; - } - } - - isc_nmhandle_attach(handle, &sendhandle); - isc_nm_send(handle, (isc_region_t *)&stop_msg, udp_connect_send_cb, - sendhandle); -} - -static void -udp_connect_recv_cb(isc_nmhandle_t *handle, isc_result_t eresult, - isc_region_t *region, void *cbarg) { - isc_nmhandle_t *readhandle = handle; - uint64_t magic = 0; - - UNUSED(cbarg); - - assert_non_null(handle); - - if (eresult != ISC_R_SUCCESS) { - isc_nmhandle_detach(&readhandle); - return; - } - - assert_int_equal(region->length, sizeof(magic)); - - atomic_fetch_add(&creads, 1); - - magic = *(uint64_t *)region->base; - - assert_true(magic == stop_magic || magic == send_magic); - - isc_nmhandle_detach(&readhandle); -} - -static void -udp_connect_connect_cb(isc_nmhandle_t *handle, isc_result_t eresult, - void *cbarg) { - isc_nmhandle_t *readhandle = NULL; - UNUSED(cbarg); - - if (eresult != ISC_R_SUCCESS) { - uint_fast64_t sends = atomic_load(&nsends); - - /* We failed to connect; try again */ - while (sends > 0) { - /* Continue until we subtract or we are done */ - if (atomic_compare_exchange_weak(&nsends, &sends, - sends - 1)) { - break; - } - } - return; - } - - atomic_fetch_add(&cconnects, 1); - - isc_nmhandle_attach(handle, &readhandle); - isc_nm_read(readhandle, udp_connect_recv_cb, readhandle); - - udp_connect_send(handle); -} - -static isc_threadresult_t -udp_connect_thread(isc_threadarg_t arg) { - isc_nm_t *connect_nm = (isc_nm_t *)arg; - - udp_connect_addr = (isc_sockaddr_t){ .length = 0 }; - isc_sockaddr_fromin6(&udp_connect_addr, &in6addr_loopback, 0); - - while (atomic_load(&nsends) > 0) { - (void)isc_nm_udpconnect(connect_nm, - (isc_nmiface_t *)&udp_connect_addr, - (isc_nmiface_t *)&udp_listen_addr, - udp_connect_connect_cb, NULL, 1, 0); - } - return ((isc_threadresult_t)0); -} - -static void -udp_noresponse(void **state) { - isc_nm_t **nm = (isc_nm_t **)*state; - isc_nm_t *listen_nm = nm[0]; - isc_nm_t *connect_nm = nm[1]; - isc_result_t result = ISC_R_SUCCESS; - isc_nmsocket_t *listen_sock = NULL; - - udp_connect_addr = (isc_sockaddr_t){ .length = 0 }; - isc_sockaddr_fromin6(&udp_connect_addr, &in6addr_loopback, 0); - - result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&udp_listen_addr, - noop_recv_cb, NULL, 0, &listen_sock); - assert_int_equal(result, ISC_R_SUCCESS); - - (void)isc_nm_udpconnect(connect_nm, (isc_nmiface_t *)&udp_connect_addr, - (isc_nmiface_t *)&udp_listen_addr, - udp_connect_connect_cb, NULL, 1, 0); - - isc_nm_stoplistening(listen_sock); - isc_nmsocket_close(&listen_sock); - assert_null(listen_sock); - isc_nm_closedown(connect_nm); - - X(cconnects); - X(csends); - X(creads); - X(ctimeouts); - X(sreads); - X(ssends); - - assert_int_equal(1, atomic_load(&cconnects)); - assert_true(atomic_load(&csends) <= 1); - assert_int_equal(0, atomic_load(&creads)); - assert_int_equal(0, atomic_load(&ctimeouts)); - assert_int_equal(0, atomic_load(&sreads)); - assert_int_equal(0, atomic_load(&ssends)); -} - -static void -udp_recv_send(void **state) { - isc_nm_t **nm = (isc_nm_t **)*state; - isc_nm_t *listen_nm = nm[0]; - isc_nm_t *connect_nm = nm[1]; - isc_result_t result = ISC_R_SUCCESS; - isc_nmsocket_t *listen_sock = NULL; - size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); - isc_thread_t threads[32] = { 0 }; - - result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&udp_listen_addr, - udp_listen_recv_cb, NULL, 0, &listen_sock); - assert_int_equal(result, ISC_R_SUCCESS); - - for (size_t i = 0; i < nthreads; i++) { - isc_thread_create(udp_connect_thread, connect_nm, &threads[i]); - } - - for (size_t i = 0; i < nthreads; i++) { - isc_thread_join(threads[i], NULL); - } - - isc_nm_stoplistening(listen_sock); - isc_nmsocket_close(&listen_sock); - assert_null(listen_sock); - - isc_nm_closedown(connect_nm); - - X(cconnects); - X(csends); - X(creads); - X(ctimeouts); - X(sreads); - X(ssends); - - assert_true(atomic_load(&cconnects) >= (NSENDS - 1) * NWRITES); - assert_true(atomic_load(&csends) <= atomic_load(&cconnects)); - - /* assert_true(atomic_load(&csends) >= atomic_load(&sreads)); */ - assert_true(atomic_load(&sreads) >= atomic_load(&ssends)); - /* assert_true(atomic_load(&ssends) >= atomic_load(&creads)); */ - assert_true(atomic_load(&creads) <= atomic_load(&csends)); - assert_true(atomic_load(&creads) >= atomic_load(&ctimeouts)); -} - -static void -udp_recv_half_send(void **state) { - isc_nm_t **nm = (isc_nm_t **)*state; - isc_nm_t *listen_nm = nm[0]; - isc_nm_t *connect_nm = nm[1]; - isc_result_t result = ISC_R_SUCCESS; - isc_nmsocket_t *listen_sock = NULL; - size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); - isc_thread_t threads[32] = { 0 }; - - result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&udp_listen_addr, - udp_listen_recv_cb, NULL, 0, &listen_sock); - assert_int_equal(result, ISC_R_SUCCESS); - - for (size_t i = 0; i < nthreads; i++) { - isc_thread_create(udp_connect_thread, connect_nm, &threads[i]); - } - - while (atomic_load(&nsends) >= (NSENDS * NWRITES) / 2) { - isc_thread_yield(); - } - - isc_nm_closedown(connect_nm); - - for (size_t i = 0; i < nthreads; i++) { - isc_thread_join(threads[i], NULL); - } - - isc_nm_stoplistening(listen_sock); - isc_nmsocket_close(&listen_sock); - assert_null(listen_sock); - - X(cconnects); - X(csends); - X(creads); - X(ctimeouts); - X(sreads); - X(ssends); - - assert_true(atomic_load(&cconnects) >= (NSENDS - 1) * NWRITES); - assert_true(atomic_load(&csends) <= atomic_load(&cconnects)); - - /* assert_true(atomic_load(&csends) >= atomic_load(&sreads)); */ - assert_true(atomic_load(&sreads) >= atomic_load(&ssends)); - /* assert_true(atomic_load(&ssends) >= atomic_load(&creads)); */ - assert_true(atomic_load(&creads) <= atomic_load(&csends)); - assert_true(atomic_load(&creads) >= atomic_load(&ctimeouts)); -} - -static void -udp_half_recv_send(void **state) { - isc_nm_t **nm = (isc_nm_t **)*state; - isc_nm_t *listen_nm = nm[0]; - isc_nm_t *connect_nm = nm[1]; - isc_result_t result = ISC_R_SUCCESS; - isc_nmsocket_t *listen_sock = NULL; - size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); - isc_thread_t threads[32] = { 0 }; - - result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&udp_listen_addr, - udp_listen_recv_cb, NULL, 0, &listen_sock); - assert_int_equal(result, ISC_R_SUCCESS); - - for (size_t i = 0; i < nthreads; i++) { - isc_thread_create(udp_connect_thread, connect_nm, &threads[i]); - } - - while (atomic_load(&nsends) >= (NSENDS * NWRITES) / 2) { - isc_thread_yield(); - } - - isc_nm_stoplistening(listen_sock); - isc_nmsocket_close(&listen_sock); - assert_null(listen_sock); - - for (size_t i = 0; i < nthreads; i++) { - isc_thread_join(threads[i], NULL); - } - - isc_nm_closedown(connect_nm); - - X(cconnects); - X(csends); - X(creads); - X(ctimeouts); - X(sreads); - X(ssends); - - assert_true(atomic_load(&cconnects) >= (NSENDS - 1) * NWRITES); - assert_true(atomic_load(&csends) <= atomic_load(&cconnects)); - - /* assert_true(atomic_load(&csends) >= atomic_load(&sreads)); */ - assert_true(atomic_load(&sreads) >= atomic_load(&ssends)); - /* assert_true(atomic_load(&ssends) >= atomic_load(&creads)); */ - assert_true(atomic_load(&creads) <= atomic_load(&csends)); - assert_true(atomic_load(&creads) >= atomic_load(&ctimeouts)); -} - -static void -udp_half_recv_half_send(void **state) { - isc_nm_t **nm = (isc_nm_t **)*state; - isc_nm_t *listen_nm = nm[0]; - isc_nm_t *connect_nm = nm[1]; - isc_result_t result = ISC_R_SUCCESS; - isc_nmsocket_t *listen_sock = NULL; - size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); - isc_thread_t threads[32] = { 0 }; - - result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&udp_listen_addr, - udp_listen_recv_cb, NULL, 0, &listen_sock); - assert_int_equal(result, ISC_R_SUCCESS); - - for (size_t i = 0; i < nthreads; i++) { - isc_thread_create(udp_connect_thread, connect_nm, &threads[i]); - } - - while (atomic_load(&nsends) >= (NSENDS * NWRITES) / 2) { - isc_thread_yield(); - } - - isc_nm_closedown(connect_nm); - isc_nm_stoplistening(listen_sock); - isc_nmsocket_close(&listen_sock); - assert_null(listen_sock); - - for (size_t i = 0; i < nthreads; i++) { - isc_thread_join(threads[i], NULL); - } - - X(cconnects); - X(csends); - X(creads); - X(ctimeouts); - X(sreads); - X(ssends); - - assert_true(atomic_load(&cconnects) >= (NSENDS - 1) * NWRITES); - assert_true(atomic_load(&csends) <= atomic_load(&cconnects)); - - /* assert_true(atomic_load(&csends) >= atomic_load(&sreads)); */ - assert_true(atomic_load(&sreads) >= atomic_load(&ssends)); - /* assert_true(atomic_load(&ssends) >= atomic_load(&creads)); */ - assert_true(atomic_load(&creads) <= atomic_load(&csends)); - assert_true(atomic_load(&creads) >= atomic_load(&ctimeouts)); -} - int main(void) { const struct CMUnitTest tests[] = { - cmocka_unit_test_setup_teardown(udp_noop, nm_setup, - nm_teardown), - cmocka_unit_test_setup_teardown(udp_noresponse, nm_setup, - nm_teardown), - cmocka_unit_test_setup_teardown(udp_recv_send, nm_setup, - nm_teardown), - cmocka_unit_test_setup_teardown(udp_recv_half_send, nm_setup, - nm_teardown), - cmocka_unit_test_setup_teardown(udp_half_recv_send, nm_setup, - nm_teardown), - cmocka_unit_test_setup_teardown(udp_half_recv_half_send, + cmocka_unit_test_setup_teardown(mock_listentcp_uv_tcp_bind, + nm_setup, nm_teardown), + cmocka_unit_test_setup_teardown(mock_listentcp_uv_fileno, + nm_setup, nm_teardown), + cmocka_unit_test_setup_teardown( + mock_listentcp_uv_tcp_getsockname, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(mock_listentcp_uv_listen, + nm_setup, nm_teardown), + cmocka_unit_test_setup_teardown(mock_tcpconnect_uv_tcp_bind, + nm_setup, nm_teardown), + cmocka_unit_test_setup_teardown(mock_tcpconnect_uv_tcp_connect, nm_setup, nm_teardown), cmocka_unit_test_setup_teardown(tcp_noop, nm_setup, nm_teardown), cmocka_unit_test_setup_teardown(tcp_noresponse, nm_setup, nm_teardown), + cmocka_unit_test_setup_teardown(tcp_recv_one, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(tcp_recv_two, nm_setup, + nm_teardown), cmocka_unit_test_setup_teardown(tcp_recv_send, nm_setup, nm_teardown), cmocka_unit_test_setup_teardown(tcp_recv_half_send, nm_setup, @@ -1182,18 +1035,7 @@ main(void) { return (cmocka_run_group_tests(tests, _setup, _teardown)); } -#else /* HAVE_UV_UDP_CONNECT */ - -#include - -int -main(void) { - printf("1..0 # Skipped: libuv >= 1.27 not available\n"); - return (0); -} - -#endif /* HAVE_UV_UDP_CONNECT */ -#else /* HAVE_CMOCKA */ +#else /* HAVE_CMOCKA */ #include diff --git a/lib/isc/tests/tcpdns_test.c b/lib/isc/tests/tcpdns_test.c new file mode 100644 index 0000000000..2145964592 --- /dev/null +++ b/lib/isc/tests/tcpdns_test.c @@ -0,0 +1,879 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#if HAVE_CMOCKA +#include /* IWYU pragma: keep */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define UNIT_TESTING +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../netmgr/netmgr-int.h" +#include "isctest.h" + +#define MAX_NM 2 + +static isc_sockaddr_t tcpdns_listen_addr; + +static uint64_t send_magic = 0; +static uint64_t stop_magic = 0; + +static uv_buf_t send_msg = { .base = (char *)&send_magic, + .len = sizeof(send_magic) }; + +static uv_buf_t stop_msg = { .base = (char *)&stop_magic, + .len = sizeof(stop_magic) }; + +static atomic_uint_fast64_t nsends; + +static atomic_uint_fast64_t ssends; +static atomic_uint_fast64_t sreads; + +static atomic_uint_fast64_t cconnects; +static atomic_uint_fast64_t csends; +static atomic_uint_fast64_t creads; +static atomic_uint_fast64_t ctimeouts; + +static unsigned int workers = 3; + +static bool reuse_supported = true; + +#define NSENDS 100 +#define NWRITES 10 + +#define CHECK_RANGE_FULL(v) \ + { \ + int __v = atomic_load(&v); \ + assert_true(__v > NSENDS * NWRITES * 10 / 100); \ + assert_true(__v <= NSENDS * NWRITES * 110 / 100); \ + } + +#define CHECK_RANGE_HALF(v) \ + { \ + int __v = atomic_load(&v); \ + assert_true(__v > NSENDS * NWRITES * 5 / 100); \ + assert_true(__v <= NSENDS * NWRITES * 60 / 100); \ + } + +/* Enable this to print values while running tests */ +#undef PRINT_DEBUG +#ifdef PRINT_DEBUG +#define X(v) fprintf(stderr, #v " = %" PRIu64 "\n", atomic_load(&v)) +#else +#define X(v) +#endif + +static int +setup_ephemeral_port(isc_sockaddr_t *addr, sa_family_t family) { + isc_result_t result; + socklen_t addrlen = sizeof(*addr); + int fd; + int r; + + isc_sockaddr_fromin6(addr, &in6addr_loopback, 0); + + fd = socket(AF_INET6, family, 0); + if (fd < 0) { + perror("setup_ephemeral_port: socket()"); + return (-1); + } + + r = bind(fd, (const struct sockaddr *)&addr->type.sa, + sizeof(addr->type.sin6)); + if (r != 0) { + perror("setup_ephemeral_port: bind()"); + close(fd); + return (r); + } + + r = getsockname(fd, (struct sockaddr *)&addr->type.sa, &addrlen); + if (r != 0) { + perror("setup_ephemeral_port: getsockname()"); + close(fd); + return (r); + } + + result = isc__nm_socket_reuse(fd); + if (result != ISC_R_SUCCESS && result != ISC_R_NOTIMPLEMENTED) { + fprintf(stderr, + "setup_ephemeral_port: isc__nm_socket_reuse(): %s", + isc_result_totext(result)); + close(fd); + return (-1); + } + + result = isc__nm_socket_reuse_lb(fd); + if (result != ISC_R_SUCCESS && result != ISC_R_NOTIMPLEMENTED) { + fprintf(stderr, + "setup_ephemeral_port: isc__nm_socket_reuse_lb(): %s", + isc_result_totext(result)); + close(fd); + return (-1); + } + if (result == ISC_R_NOTIMPLEMENTED) { + reuse_supported = false; + } + +#if IPV6_RECVERR +#define setsockopt_on(socket, level, name) \ + setsockopt(socket, level, name, &(int){ 1 }, sizeof(int)) + + r = setsockopt_on(fd, IPPROTO_IPV6, IPV6_RECVERR); + if (r != 0) { + perror("setup_ephemeral_port"); + close(fd); + return (r); + } +#endif + + return (fd); +} + +static int +_setup(void **state) { + UNUSED(state); + + /* workers = isc_os_ncpus(); */ + + if (isc_test_begin(NULL, true, workers) != ISC_R_SUCCESS) { + return (-1); + } + + signal(SIGPIPE, SIG_IGN); + + return (0); +} + +static int +_teardown(void **state) { + UNUSED(state); + + isc_test_end(); + + return (0); +} + +/* Generic */ + +static void +noop_recv_cb(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, + void *cbarg) { + UNUSED(handle); + UNUSED(eresult); + UNUSED(region); + UNUSED(cbarg); +} + +static unsigned int +noop_accept_cb(isc_nmhandle_t *handle, unsigned int result, void *cbarg) { + UNUSED(handle); + UNUSED(result); + UNUSED(cbarg); + + return (0); +} + +static void +noop_connect_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { + UNUSED(handle); + UNUSED(result); + UNUSED(cbarg); +} + +thread_local uint8_t tcpdns_buffer_storage[4096]; +thread_local size_t tcpdns_buffer_length = 0; + +static int +nm_setup(void **state) { + size_t nworkers = ISC_MAX(ISC_MIN(workers, 32), 1); + int tcpdns_listen_sock = -1; + isc_nm_t **nm = NULL; + + tcpdns_listen_addr = (isc_sockaddr_t){ .length = 0 }; + tcpdns_listen_sock = setup_ephemeral_port(&tcpdns_listen_addr, + SOCK_STREAM); + if (tcpdns_listen_sock < 0) { + return (-1); + } + close(tcpdns_listen_sock); + tcpdns_listen_sock = -1; + + atomic_store(&nsends, NSENDS * NWRITES); + + atomic_store(&csends, 0); + atomic_store(&creads, 0); + atomic_store(&sreads, 0); + atomic_store(&ssends, 0); + atomic_store(&ctimeouts, 0); + atomic_store(&cconnects, 0); + + isc_nonce_buf(&send_magic, sizeof(send_magic)); + isc_nonce_buf(&stop_magic, sizeof(stop_magic)); + if (send_magic == stop_magic) { + return (-1); + } + + nm = isc_mem_get(test_mctx, MAX_NM * sizeof(nm[0])); + for (size_t i = 0; i < MAX_NM; i++) { + nm[i] = isc_nm_start(test_mctx, nworkers); + assert_non_null(nm[i]); + isc_nm_settimeouts(nm[i], 1000, 1000, 1000, 1000); + } + + *state = nm; + + return (0); +} + +static int +nm_teardown(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + + for (size_t i = 0; i < MAX_NM; i++) { + isc_nm_destroy(&nm[i]); + assert_null(nm[i]); + } + isc_mem_put(test_mctx, nm, MAX_NM * sizeof(nm[0])); + + return (0); +} + +thread_local size_t nwrites = NWRITES; + +/* TCPDNS */ + +static void +tcpdns_connect_send_cb(isc_nmhandle_t *handle, isc_result_t eresult, + void *cbarg); + +static void +tcpdns_connect_send(isc_nmhandle_t *handle); + +static void +tcpdns_connect_send_cb(isc_nmhandle_t *handle, isc_result_t eresult, + void *cbarg) { + assert_non_null(handle); + + UNUSED(cbarg); + + if (eresult == ISC_R_SUCCESS) { + atomic_fetch_add(&csends, 1); + } else { + /* Send failed, we need to stop reading too */ + isc_nm_cancelread(handle); + } +} + +static void +tcpdns_connect_send(isc_nmhandle_t *handle) { + uint_fast64_t sends = atomic_load(&nsends); + + /* Continue until we subtract or we are sent them all */ + while (sends > 0) { + if (atomic_compare_exchange_weak(&nsends, &sends, sends - 1)) { + sends--; + break; + } + } + + if (sends == 0) { + isc_nm_send(handle, (isc_region_t *)&stop_msg, + tcpdns_connect_send_cb, NULL); + } else { + isc_nm_send(handle, (isc_region_t *)&send_msg, + tcpdns_connect_send_cb, NULL); + } +} + +static void +tcpdns_connect_read_cb(isc_nmhandle_t *handle, isc_result_t eresult, + isc_region_t *region, void *cbarg) { + uint64_t magic = 0; + + UNUSED(cbarg); + + assert_non_null(handle); + + if (eresult != ISC_R_SUCCESS) { + goto unref; + } + + assert_int_equal(region->length, sizeof(magic)); + + atomic_fetch_add(&creads, 1); + + magic = *(uint64_t *)region->base; + + assert_true(magic == stop_magic || magic == send_magic); + +unref: + isc_nmhandle_detach(&handle); +} + +static void +tcpdns_connect_connect_cb(isc_nmhandle_t *handle, isc_result_t eresult, + void *cbarg) { + isc_nmhandle_t *readhandle = NULL; + UNUSED(cbarg); + + if (eresult != ISC_R_SUCCESS) { + uint_fast64_t sends = atomic_load(&nsends); + + /* We failed to connect; try again */ + while (sends > 0) { + /* Continue until we subtract or we are done */ + if (atomic_compare_exchange_weak(&nsends, &sends, + sends - 1)) { + sends--; + break; + } + } + return; + } + + atomic_fetch_add(&cconnects, 1); + + isc_nmhandle_attach(handle, &readhandle); + isc_nm_read(handle, tcpdns_connect_read_cb, NULL); + + tcpdns_connect_send(handle); +} + +static void +tcpdns_noop(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t tcpdns_connect_addr; + + tcpdns_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&tcpdns_connect_addr, &in6addr_loopback, 0); + + result = isc_nm_listentcpdns( + listen_nm, (isc_nmiface_t *)&tcpdns_listen_addr, noop_recv_cb, + NULL, noop_accept_cb, NULL, 0, 0, NULL, &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + + (void)isc_nm_tcpdnsconnect(connect_nm, + (isc_nmiface_t *)&tcpdns_connect_addr, + (isc_nmiface_t *)&tcpdns_listen_addr, + noop_connect_cb, NULL, 1000, 0); + isc_nm_closedown(connect_nm); + + assert_int_equal(0, atomic_load(&cconnects)); + assert_int_equal(0, atomic_load(&csends)); + assert_int_equal(0, atomic_load(&creads)); + assert_int_equal(0, atomic_load(&ctimeouts)); + assert_int_equal(0, atomic_load(&sreads)); + assert_int_equal(0, atomic_load(&ssends)); +} + +static void +tcpdns_noresponse(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t tcpdns_connect_addr; + + tcpdns_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&tcpdns_connect_addr, &in6addr_loopback, 0); + + result = isc_nm_listentcpdns( + listen_nm, (isc_nmiface_t *)&tcpdns_listen_addr, noop_recv_cb, + NULL, noop_accept_cb, NULL, 0, 0, NULL, &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + (void)isc_nm_tcpdnsconnect(connect_nm, + (isc_nmiface_t *)&tcpdns_connect_addr, + (isc_nmiface_t *)&tcpdns_listen_addr, + tcpdns_connect_connect_cb, NULL, 1000, 0); + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + isc_nm_closedown(connect_nm); + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + + assert_true(atomic_load(&cconnects) <= 1); + assert_true(atomic_load(&csends) <= 1); + assert_int_equal(0, atomic_load(&creads)); + assert_int_equal(0, atomic_load(&ctimeouts)); + assert_int_equal(0, atomic_load(&sreads)); + assert_int_equal(0, atomic_load(&ssends)); +} + +static void +tcpdns_listen_read_cb(isc_nmhandle_t *handle, isc_result_t eresult, + isc_region_t *region, void *cbarg); + +static void +tcpdns_listen_send_cb(isc_nmhandle_t *handle, isc_result_t eresult, + void *cbarg) { + UNUSED(cbarg); + UNUSED(eresult); + + assert_non_null(handle); + + if (eresult != ISC_R_SUCCESS) { + return; + } + + atomic_fetch_add(&ssends, 1); +} + +static void +tcpdns_listen_read_cb(isc_nmhandle_t *handle, isc_result_t eresult, + isc_region_t *region, void *cbarg) { + uint64_t magic = 0; + + UNUSED(cbarg); + + assert_non_null(handle); + + if (eresult != ISC_R_SUCCESS) { + return; + } + + atomic_fetch_add(&sreads, 1); + + assert_int_equal(region->length, sizeof(magic)); + + magic = *(uint64_t *)region->base; + assert_true(magic == stop_magic || magic == send_magic); + + if (magic == send_magic) { + isc_nm_send(handle, region, tcpdns_listen_send_cb, NULL); + return; + } else if (magic == stop_magic) { + /* We are done, we don't send anything back */ + } +} + +static isc_result_t +tcpdns_listen_accept_cb(isc_nmhandle_t *handle, isc_result_t eresult, + void *cbarg) { + UNUSED(handle); + UNUSED(cbarg); + + return (eresult); +} + +static isc_threadresult_t +tcpdns_connect_thread(isc_threadarg_t arg) { + isc_nm_t *connect_nm = (isc_nm_t *)arg; + isc_sockaddr_t tcpdns_connect_addr; + + tcpdns_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&tcpdns_connect_addr, &in6addr_loopback, 0); + + while (atomic_load(&nsends) > 0) { + (void)isc_nm_tcpdnsconnect( + connect_nm, (isc_nmiface_t *)&tcpdns_connect_addr, + (isc_nmiface_t *)&tcpdns_listen_addr, + tcpdns_connect_connect_cb, NULL, 1000, 0); + } + + return ((isc_threadresult_t)0); +} + +static void +tcpdns_recv_one(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t tcpdns_connect_addr; + + tcpdns_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&tcpdns_connect_addr, &in6addr_loopback, 0); + + atomic_store(&nsends, 1); + + result = isc_nm_listentcpdns( + listen_nm, (isc_nmiface_t *)&tcpdns_listen_addr, + tcpdns_listen_read_cb, NULL, tcpdns_listen_accept_cb, NULL, 0, + 0, NULL, &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + (void)isc_nm_tcpdnsconnect(connect_nm, + (isc_nmiface_t *)&tcpdns_connect_addr, + (isc_nmiface_t *)&tcpdns_listen_addr, + tcpdns_connect_connect_cb, NULL, 1000, 0); + + while (atomic_load(&nsends) > 0) { + isc_thread_yield(); + } + + while (atomic_load(&cconnects) != 1 || atomic_load(&ssends) != 0 || + atomic_load(&sreads) != 1 || atomic_load(&creads) != 0 || + atomic_load(&csends) != 1) + { + isc_thread_yield(); + } + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + isc_nm_closedown(connect_nm); + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + + assert_int_equal(atomic_load(&cconnects), 1); + assert_int_equal(atomic_load(&csends), 1); + assert_int_equal(atomic_load(&creads), 0); + assert_int_equal(atomic_load(&ctimeouts), 0); + assert_int_equal(atomic_load(&sreads), 1); + assert_int_equal(atomic_load(&ssends), 0); +} + +static void +tcpdns_recv_two(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t tcpdns_connect_addr; + + tcpdns_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&tcpdns_connect_addr, &in6addr_loopback, 0); + + atomic_store(&nsends, 2); + + result = isc_nm_listentcpdns( + listen_nm, (isc_nmiface_t *)&tcpdns_listen_addr, + tcpdns_listen_read_cb, NULL, tcpdns_listen_accept_cb, NULL, 0, + 0, NULL, &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + result = isc_nm_tcpdnsconnect(connect_nm, + (isc_nmiface_t *)&tcpdns_connect_addr, + (isc_nmiface_t *)&tcpdns_listen_addr, + tcpdns_connect_connect_cb, NULL, 1000, 0); + assert_int_equal(result, ISC_R_SUCCESS); + + isc_nm_settimeouts(connect_nm, 1000, 1000, 1000, 1000); + + result = isc_nm_tcpdnsconnect(connect_nm, + (isc_nmiface_t *)&tcpdns_connect_addr, + (isc_nmiface_t *)&tcpdns_listen_addr, + tcpdns_connect_connect_cb, NULL, 1000, 0); + assert_int_equal(result, ISC_R_SUCCESS); + + while (atomic_load(&nsends) > 0) { + isc_thread_yield(); + } + + while (atomic_load(&sreads) != 2 || atomic_load(&ssends) != 1 || + atomic_load(&csends) != 2 || atomic_load(&creads) != 1) + { + isc_thread_yield(); + } + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + isc_nm_closedown(connect_nm); + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + + assert_int_equal(atomic_load(&cconnects), 2); + assert_int_equal(atomic_load(&csends), 2); + assert_int_equal(atomic_load(&creads), 1); + assert_int_equal(atomic_load(&ctimeouts), 0); + assert_int_equal(atomic_load(&sreads), 2); + assert_int_equal(atomic_load(&ssends), 1); +} + +static void +tcpdns_recv_send(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); + isc_thread_t threads[32] = { 0 }; + + if (!reuse_supported) { + skip(); + return; + } + + result = isc_nm_listentcpdns( + listen_nm, (isc_nmiface_t *)&tcpdns_listen_addr, + tcpdns_listen_read_cb, NULL, tcpdns_listen_accept_cb, NULL, 0, + 0, NULL, &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_create(tcpdns_connect_thread, connect_nm, + &threads[i]); + } + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_join(threads[i], NULL); + } + + isc_nm_closedown(connect_nm); + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + + CHECK_RANGE_FULL(csends); + CHECK_RANGE_FULL(creads); + CHECK_RANGE_FULL(sreads); + CHECK_RANGE_FULL(ssends); +} + +static void +tcpdns_recv_half_send(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); + isc_thread_t threads[32] = { 0 }; + + if (!reuse_supported) { + skip(); + return; + } + + result = isc_nm_listentcpdns( + listen_nm, (isc_nmiface_t *)&tcpdns_listen_addr, + tcpdns_listen_read_cb, NULL, tcpdns_listen_accept_cb, NULL, 0, + 0, NULL, &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_create(tcpdns_connect_thread, connect_nm, + &threads[i]); + } + + while (atomic_load(&nsends) >= (NSENDS * NWRITES) / 2) { + isc_thread_yield(); + } + + isc_nm_closedown(connect_nm); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_join(threads[i], NULL); + } + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + + CHECK_RANGE_HALF(csends); + CHECK_RANGE_HALF(creads); + CHECK_RANGE_HALF(sreads); + CHECK_RANGE_HALF(ssends); +} + +static void +tcpdns_half_recv_send(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); + isc_thread_t threads[32] = { 0 }; + + if (!reuse_supported) { + skip(); + return; + } + + result = isc_nm_listentcpdns( + listen_nm, (isc_nmiface_t *)&tcpdns_listen_addr, + tcpdns_listen_read_cb, NULL, tcpdns_listen_accept_cb, NULL, 0, + 0, NULL, &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_create(tcpdns_connect_thread, connect_nm, + &threads[i]); + } + + while (atomic_load(&nsends) >= (NSENDS * NWRITES) / 2) { + isc_thread_yield(); + } + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_join(threads[i], NULL); + } + + isc_nm_closedown(connect_nm); + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + + CHECK_RANGE_HALF(csends); + CHECK_RANGE_HALF(creads); + CHECK_RANGE_HALF(sreads); + CHECK_RANGE_HALF(ssends); +} + +static void +tcpdns_half_recv_half_send(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); + isc_thread_t threads[32] = { 0 }; + + if (!reuse_supported) { + skip(); + return; + } + + result = isc_nm_listentcpdns( + listen_nm, (isc_nmiface_t *)&tcpdns_listen_addr, + tcpdns_listen_read_cb, NULL, tcpdns_listen_accept_cb, NULL, 0, + 0, NULL, &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_create(tcpdns_connect_thread, connect_nm, + &threads[i]); + } + + while (atomic_load(&nsends) >= (NSENDS * NWRITES) / 2) { + isc_thread_yield(); + } + + isc_nm_closedown(connect_nm); + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_join(threads[i], NULL); + } + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + + CHECK_RANGE_HALF(csends); + CHECK_RANGE_HALF(creads); + CHECK_RANGE_HALF(sreads); + CHECK_RANGE_HALF(ssends); +} + +int +main(void) { + const struct CMUnitTest tests[] = { + cmocka_unit_test_setup_teardown(tcpdns_recv_one, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(tcpdns_recv_two, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(tcpdns_noop, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(tcpdns_noresponse, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(tcpdns_recv_send, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(tcpdns_recv_half_send, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(tcpdns_half_recv_send, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(tcpdns_half_recv_half_send, + nm_setup, nm_teardown), + }; + + return (cmocka_run_group_tests(tests, _setup, _teardown)); +} + +#else /* HAVE_CMOCKA */ + +#include + +int +main(void) { + printf("1..0 # Skipped: cmocka not available\n"); + return (0); +} + +#endif /* if HAVE_CMOCKA */ diff --git a/lib/isc/tests/udp_test.c b/lib/isc/tests/udp_test.c new file mode 100644 index 0000000000..a1a99314e2 --- /dev/null +++ b/lib/isc/tests/udp_test.c @@ -0,0 +1,892 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#if HAVE_CMOCKA +#include /* IWYU pragma: keep */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define UNIT_TESTING +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "uv_wrap.h" +#define KEEP_BEFORE + +#include "../netmgr/netmgr-int.h" +#include "../netmgr/udp.c" +#include "../netmgr/uv-compat.c" +#include "../netmgr/uv-compat.h" +#include "isctest.h" + +#define MAX_NM 2 + +static isc_sockaddr_t udp_listen_addr; + +static uint64_t send_magic = 0; +static uint64_t stop_magic = 0; + +static uv_buf_t send_msg = { .base = (char *)&stop_magic, + .len = sizeof(stop_magic) }; + +static uv_buf_t stop_msg = { .base = (char *)&stop_magic, + .len = sizeof(stop_magic) }; + +static atomic_uint_fast64_t nsends; + +static atomic_uint_fast64_t ssends; +static atomic_uint_fast64_t sreads; + +static atomic_uint_fast64_t cconnects; +static atomic_uint_fast64_t csends; +static atomic_uint_fast64_t creads; +static atomic_uint_fast64_t ctimeouts; + +static unsigned int workers = 3; + +#define NSENDS 100 +#define NWRITES 10 + +/* + * The UDP protocol doesn't protect against packet duplication, but instead of + * inventing de-duplication, we just ignore the upper bound. + */ + +#define CHECK_RANGE_FULL(v) \ + { \ + int __v = atomic_load(&v); \ + assert_true(NSENDS *NWRITES * 20 / 100 <= __v); \ + /* assert_true(__v <= NSENDS * NWRITES * 110 / 100); */ \ + } + +#define CHECK_RANGE_HALF(v) \ + { \ + int __v = atomic_load(&v); \ + assert_true(NSENDS *NWRITES * 10 / 100 <= __v); \ + /* assert_true(__v <= NSENDS * NWRITES * 60 / 100); */ \ + } + +/* Enable this to print values while running tests */ +#undef PRINT_DEBUG +#ifdef PRINT_DEBUG +#define X(v) fprintf(stderr, #v " = %" PRIu64 "\n", atomic_load(&v)) +#else +#define X(v) +#endif + +/* MOCK */ + +static int +setup_ephemeral_port(isc_sockaddr_t *addr, sa_family_t family) { + isc_result_t result; + socklen_t addrlen = sizeof(*addr); + int fd; + int r; + + isc_sockaddr_fromin6(addr, &in6addr_loopback, 0); + + fd = socket(AF_INET6, family, 0); + if (fd < 0) { + perror("setup_ephemeral_port: socket()"); + return (-1); + } + + r = bind(fd, (const struct sockaddr *)&addr->type.sa, + sizeof(addr->type.sin6)); + if (r != 0) { + perror("setup_ephemeral_port: bind()"); + close(fd); + return (r); + } + + r = getsockname(fd, (struct sockaddr *)&addr->type.sa, &addrlen); + if (r != 0) { + perror("setup_ephemeral_port: getsockname()"); + close(fd); + return (r); + } + + result = isc__nm_socket_reuse(fd); + if (result != ISC_R_SUCCESS && result != ISC_R_NOTIMPLEMENTED) { + fprintf(stderr, + "setup_ephemeral_port: isc__nm_socket_reuse(): %s", + isc_result_totext(result)); + close(fd); + return (-1); + } + + result = isc__nm_socket_reuse_lb(fd); + if (result != ISC_R_SUCCESS && result != ISC_R_NOTIMPLEMENTED) { + fprintf(stderr, + "setup_ephemeral_port: isc__nm_socket_reuse_lb(): %s", + isc_result_totext(result)); + close(fd); + return (-1); + } + +#if IPV6_RECVERR +#define setsockopt_on(socket, level, name) \ + setsockopt(socket, level, name, &(int){ 1 }, sizeof(int)) + + r = setsockopt_on(fd, IPPROTO_IPV6, IPV6_RECVERR); + if (r != 0) { + perror("setup_ephemeral_port"); + close(fd); + return (r); + } +#endif + + return (fd); +} + +static int +_setup(void **state) { + UNUSED(state); + + /* workers = isc_os_ncpus(); */ + + if (isc_test_begin(NULL, true, workers) != ISC_R_SUCCESS) { + return (-1); + } + + signal(SIGPIPE, SIG_IGN); + + return (0); +} + +static int +_teardown(void **state) { + UNUSED(state); + + isc_test_end(); + + return (0); +} + +/* Generic */ + +static void +noop_recv_cb(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, + void *cbarg) { + UNUSED(handle); + UNUSED(eresult); + UNUSED(region); + UNUSED(cbarg); +} + +static void +noop_connect_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { + UNUSED(handle); + UNUSED(result); + UNUSED(cbarg); +} + +static int +nm_setup(void **state) { + size_t nworkers = ISC_MAX(ISC_MIN(workers, 32), 1); + int udp_listen_sock = -1; + isc_nm_t **nm = NULL; + + udp_listen_addr = (isc_sockaddr_t){ .length = 0 }; + udp_listen_sock = setup_ephemeral_port(&udp_listen_addr, SOCK_DGRAM); + if (udp_listen_sock < 0) { + return (-1); + } + close(udp_listen_sock); + udp_listen_sock = -1; + + atomic_store(&nsends, NSENDS * NWRITES); + + atomic_store(&csends, 0); + atomic_store(&creads, 0); + atomic_store(&sreads, 0); + atomic_store(&ssends, 0); + atomic_store(&ctimeouts, 0); + atomic_store(&cconnects, 0); + + isc_nonce_buf(&send_magic, sizeof(send_magic)); + isc_nonce_buf(&stop_magic, sizeof(stop_magic)); + if (send_magic == stop_magic) { + return (-1); + } + + nm = isc_mem_get(test_mctx, MAX_NM * sizeof(nm[0])); + for (size_t i = 0; i < MAX_NM; i++) { + nm[i] = isc_nm_start(test_mctx, nworkers); + assert_non_null(nm[i]); + } + + *state = nm; + + return (0); +} + +static int +nm_teardown(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + + for (size_t i = 0; i < MAX_NM; i++) { + isc_nm_destroy(&nm[i]); + assert_null(nm[i]); + } + isc_mem_put(test_mctx, nm, MAX_NM * sizeof(nm[0])); + + return (0); +} + +thread_local size_t nwrites = NWRITES; + +/* UDP */ + +static void +udp_listen_send_cb(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg) { + assert_non_null(handle); + UNUSED(cbarg); + + if (eresult == ISC_R_SUCCESS) { + atomic_fetch_add(&ssends, 1); + } +} + +static void +udp_listen_recv_cb(isc_nmhandle_t *handle, isc_result_t eresult, + isc_region_t *region, void *cbarg) { + uint64_t magic = 0; + + assert_null(cbarg); + + if (eresult != ISC_R_SUCCESS) { + return; + } + + assert_int_equal(region->length, sizeof(send_magic)); + atomic_fetch_add(&sreads, 1); + magic = *(uint64_t *)region->base; + + assert_true(magic == stop_magic || magic == send_magic); + isc_nm_send(handle, region, udp_listen_send_cb, NULL); +} + +static void +mock_listenudp_uv_udp_open(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t udp_connect_addr; + + udp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&udp_connect_addr, &in6addr_loopback, 0); + + WILL_RETURN(uv_udp_open, UV_ENOMEM); + + result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&udp_listen_addr, + noop_recv_cb, NULL, 0, &listen_sock); + assert_int_not_equal(result, ISC_R_SUCCESS); + assert_null(listen_sock); + + RESET_RETURN; +} + +static void +mock_listenudp_uv_udp_bind(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t udp_connect_addr; + + udp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&udp_connect_addr, &in6addr_loopback, 0); + + WILL_RETURN(uv_udp_bind, UV_EADDRINUSE); + + result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&udp_listen_addr, + noop_recv_cb, NULL, 0, &listen_sock); + assert_int_not_equal(result, ISC_R_SUCCESS); + assert_null(listen_sock); + + RESET_RETURN; +} + +static void +mock_listenudp_uv_udp_recv_start(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t udp_connect_addr; + + udp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&udp_connect_addr, &in6addr_loopback, 0); + + WILL_RETURN(uv_udp_recv_start, UV_EADDRINUSE); + + result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&udp_listen_addr, + noop_recv_cb, NULL, 0, &listen_sock); + assert_int_not_equal(result, ISC_R_SUCCESS); + assert_null(listen_sock); + + RESET_RETURN; +} + +static void +mock_udpconnect_uv_udp_open(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_sockaddr_t udp_connect_addr; + + udp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&udp_connect_addr, &in6addr_loopback, 0); + + WILL_RETURN(uv_udp_open, UV_ENOMEM); + + result = isc_nm_udpconnect(connect_nm, + (isc_nmiface_t *)&udp_connect_addr, + (isc_nmiface_t *)&udp_listen_addr, + noop_connect_cb, NULL, 1000, 0); + assert_int_not_equal(result, ISC_R_SUCCESS); + + isc_nm_closedown(connect_nm); + + RESET_RETURN; +} + +static void +mock_udpconnect_uv_udp_bind(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_sockaddr_t udp_connect_addr; + + udp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&udp_connect_addr, &in6addr_loopback, 0); + + WILL_RETURN(uv_udp_bind, UV_ENOMEM); + + result = isc_nm_udpconnect(connect_nm, + (isc_nmiface_t *)&udp_connect_addr, + (isc_nmiface_t *)&udp_listen_addr, + noop_connect_cb, NULL, 1000, 0); + assert_int_not_equal(result, ISC_R_SUCCESS); + + isc_nm_closedown(connect_nm); + + RESET_RETURN; +} + +#if HAVE_UV_UDP_CONNECT +static void +mock_udpconnect_uv_udp_connect(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_sockaddr_t udp_connect_addr; + + udp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&udp_connect_addr, &in6addr_loopback, 0); + + WILL_RETURN(uv_udp_connect, UV_ENOMEM); + + result = isc_nm_udpconnect(connect_nm, + (isc_nmiface_t *)&udp_connect_addr, + (isc_nmiface_t *)&udp_listen_addr, + noop_connect_cb, NULL, 1000, 0); + assert_int_not_equal(result, ISC_R_SUCCESS); + + isc_nm_closedown(connect_nm); + + RESET_RETURN; +} +#endif + +static void +mock_udpconnect_uv_recv_buffer_size(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_sockaddr_t udp_connect_addr; + + udp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&udp_connect_addr, &in6addr_loopback, 0); + + WILL_RETURN(uv_recv_buffer_size, UV_ENOMEM); + + result = isc_nm_udpconnect(connect_nm, + (isc_nmiface_t *)&udp_connect_addr, + (isc_nmiface_t *)&udp_listen_addr, + noop_connect_cb, NULL, 1000, 0); + assert_int_equal(result, ISC_R_SUCCESS); /* FIXME: should fail */ + + isc_nm_closedown(connect_nm); + + RESET_RETURN; +} + +static void +mock_udpconnect_uv_send_buffer_size(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_sockaddr_t udp_connect_addr; + + udp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&udp_connect_addr, &in6addr_loopback, 0); + + WILL_RETURN(uv_send_buffer_size, UV_ENOMEM); + + result = isc_nm_udpconnect(connect_nm, + (isc_nmiface_t *)&udp_connect_addr, + (isc_nmiface_t *)&udp_listen_addr, + noop_connect_cb, NULL, 1000, 0); + assert_int_equal(result, ISC_R_SUCCESS); /* FIXME: should fail */ + + isc_nm_closedown(connect_nm); + + RESET_RETURN; +} + +static void +udp_noop(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t udp_connect_addr; + + udp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&udp_connect_addr, &in6addr_loopback, 0); + + result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&udp_listen_addr, + noop_recv_cb, NULL, 0, &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + + (void)isc_nm_udpconnect(connect_nm, (isc_nmiface_t *)&udp_connect_addr, + (isc_nmiface_t *)&udp_listen_addr, + noop_connect_cb, NULL, 1000, 0); + + isc_nm_closedown(connect_nm); + + assert_int_equal(0, atomic_load(&cconnects)); + assert_int_equal(0, atomic_load(&csends)); + assert_int_equal(0, atomic_load(&creads)); + assert_int_equal(0, atomic_load(&ctimeouts)); + assert_int_equal(0, atomic_load(&sreads)); + assert_int_equal(0, atomic_load(&ssends)); +} + +static void +udp_connect_send_cb(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg); +static void +udp_connect_recv_cb(isc_nmhandle_t *handle, isc_result_t eresult, + isc_region_t *region, void *cbarg); + +static void +udp_connect_send_cb(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg) { + assert_non_null(handle); + + UNUSED(eresult); + UNUSED(cbarg); + + atomic_fetch_add(&csends, 1); +} + +static void +udp_connect_send(isc_nmhandle_t *handle, isc_region_t *region) { + uint_fast64_t sends = atomic_load(&nsends); + + while (sends > 0) { + /* Continue until we subtract or we are done */ + if (atomic_compare_exchange_weak(&nsends, &sends, sends - 1)) { + break; + } + } + + isc_nm_send(handle, region, udp_connect_send_cb, NULL); +} + +static void +udp_connect_recv_cb(isc_nmhandle_t *handle, isc_result_t eresult, + isc_region_t *region, void *cbarg) { + uint64_t magic = 0; + + UNUSED(cbarg); + + assert_non_null(handle); + + if (eresult != ISC_R_SUCCESS) { + goto unref; + } + + assert_int_equal(region->length, sizeof(magic)); + + atomic_fetch_add(&creads, 1); + + magic = *(uint64_t *)region->base; + + assert_true(magic == stop_magic || magic == send_magic); + + if (magic == stop_magic) { + goto unref; + } + + if (isc_random_uniform(NWRITES) == 0) { + udp_connect_send(handle, (isc_region_t *)&stop_msg); + } else { + udp_connect_send(handle, (isc_region_t *)&send_msg); + } +unref: + isc_nmhandle_detach(&handle); +} + +static void +udp_connect_connect_cb(isc_nmhandle_t *handle, isc_result_t eresult, + void *cbarg) { + isc_nmhandle_t *readhandle = NULL; + + UNUSED(cbarg); + + if (eresult != ISC_R_SUCCESS) { + uint_fast64_t sends = atomic_load(&nsends); + + /* We failed to connect; try again */ + while (sends > 0) { + /* Continue until we subtract or we are done */ + if (atomic_compare_exchange_weak(&nsends, &sends, + sends - 1)) { + break; + } + } + return; + } + + atomic_fetch_add(&cconnects, 1); + + isc_nmhandle_attach(handle, &readhandle); + isc_nm_read(handle, udp_connect_recv_cb, NULL); + + udp_connect_send(handle, (isc_region_t *)&send_msg); +} + +static isc_threadresult_t +udp_connect_thread(isc_threadarg_t arg) { + isc_nm_t *connect_nm = (isc_nm_t *)arg; + isc_sockaddr_t udp_connect_addr; + + udp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&udp_connect_addr, &in6addr_loopback, 0); + + while (atomic_load(&nsends) > 0) { + (void)isc_nm_udpconnect(connect_nm, + (isc_nmiface_t *)&udp_connect_addr, + (isc_nmiface_t *)&udp_listen_addr, + udp_connect_connect_cb, NULL, 1000, 0); + } + return ((isc_threadresult_t)0); +} + +static void +udp_noresponse(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + isc_sockaddr_t udp_connect_addr; + + udp_connect_addr = (isc_sockaddr_t){ .length = 0 }; + isc_sockaddr_fromin6(&udp_connect_addr, &in6addr_loopback, 0); + + result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&udp_listen_addr, + noop_recv_cb, NULL, 0, &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + (void)isc_nm_udpconnect(connect_nm, (isc_nmiface_t *)&udp_connect_addr, + (isc_nmiface_t *)&udp_listen_addr, + udp_connect_connect_cb, NULL, 1000, 0); + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + isc_nm_closedown(connect_nm); + + while (atomic_load(&cconnects) != 1) { + isc_thread_yield(); + } + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + + assert_int_equal(1, atomic_load(&cconnects)); + assert_true(atomic_load(&csends) <= 1); + assert_int_equal(0, atomic_load(&creads)); + assert_int_equal(0, atomic_load(&ctimeouts)); + assert_int_equal(0, atomic_load(&sreads)); + assert_int_equal(0, atomic_load(&ssends)); +} + +static void +udp_recv_send(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); + isc_thread_t threads[32] = { 0 }; + + result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&udp_listen_addr, + udp_listen_recv_cb, NULL, 0, &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_create(udp_connect_thread, connect_nm, &threads[i]); + } + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_join(threads[i], NULL); + } + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + + isc_nm_closedown(connect_nm); + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + + assert_true(atomic_load(&cconnects) >= (NSENDS - 1) * NWRITES); + CHECK_RANGE_FULL(csends); + CHECK_RANGE_FULL(creads); + CHECK_RANGE_FULL(sreads); + CHECK_RANGE_FULL(ssends); +} + +static void +udp_recv_half_send(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); + isc_thread_t threads[32] = { 0 }; + + result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&udp_listen_addr, + udp_listen_recv_cb, NULL, 0, &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_create(udp_connect_thread, connect_nm, &threads[i]); + } + + while (atomic_load(&nsends) >= (NSENDS * NWRITES) / 2) { + isc_thread_yield(); + } + + isc_nm_closedown(connect_nm); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_join(threads[i], NULL); + } + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + + assert_true(atomic_load(&cconnects) >= (NSENDS - 1) * NWRITES); + CHECK_RANGE_FULL(csends); + CHECK_RANGE_HALF(creads); + CHECK_RANGE_HALF(sreads); + CHECK_RANGE_HALF(ssends); +} + +static void +udp_half_recv_send(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); + isc_thread_t threads[32] = { 0 }; + + result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&udp_listen_addr, + udp_listen_recv_cb, NULL, 0, &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_create(udp_connect_thread, connect_nm, &threads[i]); + } + + while (atomic_load(&nsends) >= (NSENDS * NWRITES) / 2) { + isc_thread_yield(); + } + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_join(threads[i], NULL); + } + + isc_nm_closedown(connect_nm); + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + + assert_true(atomic_load(&cconnects) >= (NSENDS - 1) * NWRITES); + CHECK_RANGE_FULL(csends); + CHECK_RANGE_HALF(creads); + CHECK_RANGE_HALF(sreads); + CHECK_RANGE_HALF(ssends); +} + +static void +udp_half_recv_half_send(void **state) { + isc_nm_t **nm = (isc_nm_t **)*state; + isc_nm_t *listen_nm = nm[0]; + isc_nm_t *connect_nm = nm[1]; + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + size_t nthreads = ISC_MAX(ISC_MIN(workers, 32), 1); + isc_thread_t threads[32] = { 0 }; + + result = isc_nm_listenudp(listen_nm, (isc_nmiface_t *)&udp_listen_addr, + udp_listen_recv_cb, NULL, 0, &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_create(udp_connect_thread, connect_nm, &threads[i]); + } + + while (atomic_load(&nsends) >= (NSENDS * NWRITES) / 2) { + isc_thread_yield(); + } + + isc_nm_closedown(connect_nm); + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + + for (size_t i = 0; i < nthreads; i++) { + isc_thread_join(threads[i], NULL); + } + + X(cconnects); + X(csends); + X(creads); + X(ctimeouts); + X(sreads); + X(ssends); + + assert_true(atomic_load(&cconnects) >= (NSENDS - 1) * NWRITES); + CHECK_RANGE_FULL(csends); + CHECK_RANGE_HALF(creads); + CHECK_RANGE_HALF(sreads); + CHECK_RANGE_HALF(ssends); +} + +int +main(void) { + const struct CMUnitTest tests[] = { + cmocka_unit_test_setup_teardown(mock_listenudp_uv_udp_open, + nm_setup, nm_teardown), + cmocka_unit_test_setup_teardown(mock_listenudp_uv_udp_bind, + nm_setup, nm_teardown), + cmocka_unit_test_setup_teardown( + mock_listenudp_uv_udp_recv_start, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(mock_udpconnect_uv_udp_open, + nm_setup, nm_teardown), + cmocka_unit_test_setup_teardown(mock_udpconnect_uv_udp_bind, + nm_setup, nm_teardown), +#if HAVE_UV_UDP_CONNECT + cmocka_unit_test_setup_teardown(mock_udpconnect_uv_udp_connect, + nm_setup, nm_teardown), +#endif + cmocka_unit_test_setup_teardown( + mock_udpconnect_uv_recv_buffer_size, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown( + mock_udpconnect_uv_send_buffer_size, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(udp_noop, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(udp_noresponse, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(udp_recv_send, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(udp_recv_half_send, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(udp_half_recv_send, nm_setup, + nm_teardown), + cmocka_unit_test_setup_teardown(udp_half_recv_half_send, + nm_setup, nm_teardown), + }; + + return (cmocka_run_group_tests(tests, _setup, _teardown)); +} + +#else /* HAVE_CMOCKA */ + +#include + +int +main(void) { + printf("1..0 # Skipped: cmocka not available\n"); + return (0); +} + +#endif /* if HAVE_CMOCKA */ diff --git a/lib/isc/tests/uv_wrap.h b/lib/isc/tests/uv_wrap.h new file mode 100644 index 0000000000..b1b0e3369f --- /dev/null +++ b/lib/isc/tests/uv_wrap.h @@ -0,0 +1,319 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#if HAVE_CMOCKA +#include +#include /* IWYU pragma: keep */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define UNIT_TESTING +#include + +/* uv_udp_t */ + +int +__wrap_uv_udp_open(uv_udp_t *handle, uv_os_sock_t sock); +int +__wrap_uv_udp_bind(uv_udp_t *handle, const struct sockaddr *addr, + unsigned int flags); +#if HAVE_UV_UDP_CONNECT +int +__wrap_uv_udp_connect(uv_udp_t *handle, const struct sockaddr *addr); +int +__wrap_uv_udp_getpeername(const uv_udp_t *handle, struct sockaddr *name, + int *namelen); +#endif /* HAVE_UV_UDP_CONNECT */ +int +__wrap_uv_udp_getsockname(const uv_udp_t *handle, struct sockaddr *name, + int *namelen); +int +__wrap_uv_udp_send(uv_udp_send_t *req, uv_udp_t *handle, const uv_buf_t bufs[], + unsigned int nbufs, const struct sockaddr *addr, + uv_udp_send_cb send_cb); +int +__wrap_uv_udp_recv_start(uv_udp_t *handle, uv_alloc_cb alloc_cb, + uv_udp_recv_cb recv_cb); +int +__wrap_uv_udp_recv_stop(uv_udp_t *handle); + +/* uv_tcp_t */ +int +__wrap_uv_tcp_open(uv_tcp_t *handle, uv_os_sock_t sock); +int +__wrap_uv_tcp_bind(uv_tcp_t *handle, const struct sockaddr *addr, + unsigned int flags); +int +__wrap_uv_tcp_getsockname(const uv_tcp_t *handle, struct sockaddr *name, + int *namelen); +int +__wrap_uv_tcp_getpeername(const uv_tcp_t *handle, struct sockaddr *name, + int *namelen); +int +__wrap_uv_tcp_connect(uv_connect_t *req, uv_tcp_t *handle, + const struct sockaddr *addr, uv_connect_cb cb); + +/* uv_stream_t */ +int +__wrap_uv_listen(uv_stream_t *stream, int backlog, uv_connection_cb cb); +int +__wrap_uv_accept(uv_stream_t *server, uv_stream_t *client); + +/* uv_handle_t */ +int +__wrap_uv_send_buffer_size(uv_handle_t *handle, int *value); +int +__wrap_uv_recv_buffer_size(uv_handle_t *handle, int *value); +int +__wrap_uv_fileno(const uv_handle_t *handle, uv_os_fd_t *fd); + +/* uv_timer_t */ +/* FIXME */ +/* + * uv_timer_init + * uv_timer_start + */ + +static atomic_int __state_uv_udp_open = ATOMIC_VAR_INIT(0); + +int +__wrap_uv_udp_open(uv_udp_t *handle, uv_os_sock_t sock) { + if (atomic_load(&__state_uv_udp_open) == 0) { + return (uv_udp_open(handle, sock)); + } + return (atomic_load(&__state_uv_udp_open)); +} + +static atomic_int __state_uv_udp_bind = ATOMIC_VAR_INIT(0); + +int +__wrap_uv_udp_bind(uv_udp_t *handle, const struct sockaddr *addr, + unsigned int flags) { + if (atomic_load(&__state_uv_udp_bind) == 0) { + return (uv_udp_bind(handle, addr, flags)); + } + return (atomic_load(&__state_uv_udp_bind)); +} + +static atomic_int __state_uv_udp_connect = ATOMIC_VAR_INIT(0); +#if HAVE_UV_UDP_CONNECT +int +__wrap_uv_udp_connect(uv_udp_t *handle, const struct sockaddr *addr) { + if (atomic_load(&__state_uv_udp_connect) == 0) { + return (uv_udp_connect(handle, addr)); + } + return (atomic_load(&__state_uv_udp_connect)); +} +#endif /* HAVE_UV_UDP_CONNECT */ + +static atomic_int __state_uv_udp_getpeername = ATOMIC_VAR_INIT(0); +#if HAVE_UV_UDP_CONNECT +int +__wrap_uv_udp_getpeername(const uv_udp_t *handle, struct sockaddr *name, + int *namelen) { + if (atomic_load(&__state_uv_udp_getpeername) == 0) { + return (uv_udp_getpeername(handle, name, namelen)); + } + return (atomic_load(&__state_uv_udp_getpeername)); +} +#endif /* HAVE_UV_UDP_CONNECT */ + +static atomic_int __state_uv_udp_getsockname = ATOMIC_VAR_INIT(0); +int +__wrap_uv_udp_getsockname(const uv_udp_t *handle, struct sockaddr *name, + int *namelen) { + if (atomic_load(&__state_uv_udp_getsockname) == 0) { + return (uv_udp_getsockname(handle, name, namelen)); + } + return (atomic_load(&__state_uv_udp_getsockname)); +} + +static atomic_int __state_uv_udp_send = ATOMIC_VAR_INIT(0); +int +__wrap_uv_udp_send(uv_udp_send_t *req, uv_udp_t *handle, const uv_buf_t bufs[], + unsigned int nbufs, const struct sockaddr *addr, + uv_udp_send_cb send_cb) { + if (atomic_load(&__state_uv_udp_send) == 0) { + return (uv_udp_send(req, handle, bufs, nbufs, addr, send_cb)); + } + return (atomic_load(&__state_uv_udp_send)); +} + +static atomic_int __state_uv_udp_recv_start = ATOMIC_VAR_INIT(0); +int +__wrap_uv_udp_recv_start(uv_udp_t *handle, uv_alloc_cb alloc_cb, + uv_udp_recv_cb recv_cb) { + if (atomic_load(&__state_uv_udp_recv_start) == 0) { + return (uv_udp_recv_start(handle, alloc_cb, recv_cb)); + } + return (atomic_load(&__state_uv_udp_recv_start)); +} + +static atomic_int __state_uv_udp_recv_stop = ATOMIC_VAR_INIT(0); +int +__wrap_uv_udp_recv_stop(uv_udp_t *handle) { + if (atomic_load(&__state_uv_udp_recv_stop) == 0) { + return (uv_udp_recv_stop(handle)); + } + return (atomic_load(&__state_uv_udp_recv_stop)); +} + +static atomic_int __state_uv_tcp_open = ATOMIC_VAR_INIT(0); +int +__wrap_uv_tcp_open(uv_tcp_t *handle, uv_os_sock_t sock) { + if (atomic_load(&__state_uv_tcp_open) == 0) { + return (uv_tcp_open(handle, sock)); + } + return (atomic_load(&__state_uv_tcp_open)); +} + +static atomic_int __state_uv_tcp_bind = ATOMIC_VAR_INIT(0); +int +__wrap_uv_tcp_bind(uv_tcp_t *handle, const struct sockaddr *addr, + unsigned int flags) { + if (atomic_load(&__state_uv_tcp_bind) == 0) { + return (uv_tcp_bind(handle, addr, flags)); + } + return (atomic_load(&__state_uv_tcp_bind)); +} + +static atomic_int __state_uv_tcp_getsockname = ATOMIC_VAR_INIT(0); +int +__wrap_uv_tcp_getsockname(const uv_tcp_t *handle, struct sockaddr *name, + int *namelen) { + if (atomic_load(&__state_uv_tcp_getsockname) == 0) { + return (uv_tcp_getsockname(handle, name, namelen)); + } + return (atomic_load(&__state_uv_tcp_getsockname)); +} + +static atomic_int __state_uv_tcp_getpeername = ATOMIC_VAR_INIT(0); +int +__wrap_uv_tcp_getpeername(const uv_tcp_t *handle, struct sockaddr *name, + int *namelen) { + if (atomic_load(&__state_uv_tcp_getpeername) == 0) { + return (uv_tcp_getpeername(handle, name, namelen)); + } + return (atomic_load(&__state_uv_tcp_getpeername)); +} + +static atomic_int __state_uv_tcp_connect = ATOMIC_VAR_INIT(0); +int +__wrap_uv_tcp_connect(uv_connect_t *req, uv_tcp_t *handle, + const struct sockaddr *addr, uv_connect_cb cb) { + if (atomic_load(&__state_uv_tcp_connect) == 0) { + return (uv_tcp_connect(req, handle, addr, cb)); + } + return (atomic_load(&__state_uv_tcp_connect)); +} + +static atomic_int __state_uv_listen = ATOMIC_VAR_INIT(0); +int +__wrap_uv_listen(uv_stream_t *stream, int backlog, uv_connection_cb cb) { + if (atomic_load(&__state_uv_listen) == 0) { + return (uv_listen(stream, backlog, cb)); + } + return (atomic_load(&__state_uv_listen)); +} + +static atomic_int __state_uv_accept = ATOMIC_VAR_INIT(0); +int +__wrap_uv_accept(uv_stream_t *server, uv_stream_t *client) { + if (atomic_load(&__state_uv_accept) == 0) { + return (uv_accept(server, client)); + } + return (atomic_load(&__state_uv_accept)); +} + +static atomic_int __state_uv_send_buffer_size = ATOMIC_VAR_INIT(0); +int +__wrap_uv_send_buffer_size(uv_handle_t *handle, int *value) { + if (atomic_load(&__state_uv_send_buffer_size) == 0) { + return (uv_send_buffer_size(handle, value)); + } + return (atomic_load(&__state_uv_send_buffer_size)); +} + +static atomic_int __state_uv_recv_buffer_size = ATOMIC_VAR_INIT(0); +int +__wrap_uv_recv_buffer_size(uv_handle_t *handle, int *value) { + if (atomic_load(&__state_uv_recv_buffer_size) == 0) { + return (uv_recv_buffer_size(handle, value)); + } + return (atomic_load(&__state_uv_recv_buffer_size)); +} + +static atomic_int __state_uv_fileno = ATOMIC_VAR_INIT(0); +int +__wrap_uv_fileno(const uv_handle_t *handle, uv_os_fd_t *fd) { + if (atomic_load(&__state_uv_fileno) == 0) { + return (uv_fileno(handle, fd)); + } + return (atomic_load(&__state_uv_fileno)); +} + +#define uv_udp_open(...) __wrap_uv_udp_open(__VA_ARGS__) +#define uv_udp_bind(...) __wrap_uv_udp_bind(__VA_ARGS__) +#if HAVE_UV_UDP_CONNECT +#define uv_udp_connect(...) __wrap_uv_udp_connect(__VA_ARGS__) +#define uv_udp_getpeername(...) __wrap_uv_udp_getpeername(__VA_ARGS__) +#endif /* HAVE_UV_UDP_CONNECT */ +#define uv_udp_getsockname(...) __wrap_uv_udp_getsockname(__VA_ARGS__) +#define uv_udp_send(...) __wrap_uv_udp_send(__VA_ARGS__) +#define uv_udp_recv_start(...) __wrap_uv_udp_recv_start(__VA_ARGS__) +#define uv_udp_recv_stop(...) __wrap_uv_udp_recv_stop(__VA_ARGS__) + +#define uv_tcp_open(...) __wrap_uv_tcp_open(__VA_ARGS__) +#define uv_tcp_bind(...) __wrap_uv_tcp_bind(__VA_ARGS__) +#define uv_tcp_getsockname(...) __wrap_uv_tcp_getsockname(__VA_ARGS__) +#define uv_tcp_getpeername(...) __wrap_uv_tcp_getpeername(__VA_ARGS__) +#define uv_tcp_connect(...) __wrap_uv_tcp_connect(__VA_ARGS__) + +#define uv_listen(...) __wrap_uv_listen(__VA_ARGS__) +#define uv_accept(...) __wrap_uv_accept(__VA_ARGS__) + +#define uv_send_buffer_size(...) __wrap_uv_send_buffer_size(__VA_ARGS__) +#define uv_recv_buffer_size(...) __wrap_uv_recv_buffer_size(__VA_ARGS__) +#define uv_fileno(...) __wrap_uv_fileno(__VA_ARGS__) + +#define RESET_RETURN \ + { \ + atomic_store(&__state_uv_udp_open, 0); \ + atomic_store(&__state_uv_udp_bind, 0); \ + atomic_store(&__state_uv_udp_connect, 0); \ + atomic_store(&__state_uv_udp_getpeername, 0); \ + atomic_store(&__state_uv_udp_getsockname, 0); \ + atomic_store(&__state_uv_udp_send, 0); \ + atomic_store(&__state_uv_udp_recv_start, 0); \ + atomic_store(&__state_uv_udp_recv_stop, 0); \ + atomic_store(&__state_uv_tcp_open, 0); \ + atomic_store(&__state_uv_tcp_bind, 0); \ + atomic_store(&__state_uv_tcp_getpeername, 0); \ + atomic_store(&__state_uv_tcp_getsockname, 0); \ + atomic_store(&__state_uv_tcp_connect, 0); \ + atomic_store(&__state_uv_listen, 0); \ + atomic_store(&__state_uv_accept, 0); \ + atomic_store(&__state_uv_send_buffer_size, 0); \ + atomic_store(&__state_uv_recv_buffer_size, 0); \ + atomic_store(&__state_uv_fileno, 0); \ + } + +#define WILL_RETURN(func, value) atomic_store(&__state_##func, value) + +#endif /* HAVE_CMOCKA */ diff --git a/lib/isc/win32/libisc.def.in b/lib/isc/win32/libisc.def.in index 69ebcb56e6..498f258783 100644 --- a/lib/isc/win32/libisc.def.in +++ b/lib/isc/win32/libisc.def.in @@ -434,8 +434,8 @@ isc_netaddr_setzone isc_netaddr_totext isc_netaddr_unspec isc_netscope_pton -isc_nmhandle_attach -isc_nmhandle_detach +isc__nmhandle_attach +isc__nmhandle_detach isc_nmhandle_getdata isc_nmhandle_getextra isc_nmhandle_is_stream @@ -463,8 +463,8 @@ isc_nm_start isc_nm_stoplistening isc_nm_tcpconnect isc_nm_tcpdnsconnect -isc_nm_tcp_gettimeouts -isc_nm_tcp_settimeouts +isc_nm_gettimeouts +isc_nm_settimeouts isc_nm_tcpdns_keepalive isc_nm_tcpdns_sequential isc_nm_tid diff --git a/lib/isc/win32/libisc.vcxproj.in b/lib/isc/win32/libisc.vcxproj.in index 10952b2293..98ce0c9d38 100644 --- a/lib/isc/win32/libisc.vcxproj.in +++ b/lib/isc/win32/libisc.vcxproj.in @@ -414,6 +414,7 @@ copy InstallFiles ..\Build\Release\ + diff --git a/lib/ns/client.c b/lib/ns/client.c index 9ac75ff25d..0889688d49 100644 --- a/lib/ns/client.c +++ b/lib/ns/client.c @@ -1030,8 +1030,8 @@ no_nsid: INSIST(count < DNS_EDNSOPTIONS); - isc_nm_tcp_gettimeouts(isc_nmhandle_netmgr(client->handle), - NULL, NULL, NULL, &adv); + isc_nm_gettimeouts(isc_nmhandle_netmgr(client->handle), NULL, + NULL, NULL, &adv); isc_buffer_init(&buf, advtimo, sizeof(advtimo)); isc_buffer_putuint16(&buf, (uint16_t)adv); ednsopts[count].code = DNS_OPT_TCP_KEEPALIVE; @@ -1644,7 +1644,9 @@ ns__client_request(isc_nmhandle_t *handle, isc_result_t eresult, #endif /* ifdef HAVE_DNSTAP */ ifp = (ns_interface_t *)arg; - UNUSED(eresult); + if (eresult != ISC_R_SUCCESS) { + return; + } mgr = ifp->clientmgr; if (mgr == NULL) { @@ -2210,7 +2212,9 @@ ns__client_tcpconn(isc_nmhandle_t *handle, isc_result_t result, void *arg) { isc_netaddr_t netaddr; int match; - UNUSED(result); + if (result != ISC_R_SUCCESS) { + return (result); + } if (handle != NULL) { peeraddr = isc_nmhandle_peeraddr(handle); diff --git a/util/copyrights b/util/copyrights index 47081e0825..71420be276 100644 --- a/util/copyrights +++ b/util/copyrights @@ -1897,6 +1897,7 @@ ./lib/isc/netmgr/tcp.c C 2019,2020 ./lib/isc/netmgr/tcpdns.c C 2019,2020 ./lib/isc/netmgr/tls.c C 2020 +./lib/isc/netmgr/tlsdns.c C 2020 ./lib/isc/netmgr/udp.c C 2019,2020 ./lib/isc/netmgr/uv-compat.c C 2020 ./lib/isc/netmgr/uv-compat.h C 2019,2020 @@ -1952,7 +1953,6 @@ ./lib/isc/tests/md_test.c C 2018,2019,2020 ./lib/isc/tests/mem_test.c C 2015,2016,2017,2018,2019,2020 ./lib/isc/tests/netaddr_test.c C 2016,2018,2019,2020 -./lib/isc/tests/netmgr_test.c C 2020 ./lib/isc/tests/parse_test.c C 2012,2013,2016,2018,2019,2020 ./lib/isc/tests/pool_test.c C 2013,2016,2018,2019,2020 ./lib/isc/tests/quota_test.c C 2020 @@ -1967,9 +1967,14 @@ ./lib/isc/tests/symtab_test.c C 2011,2012,2013,2016,2018,2019,2020 ./lib/isc/tests/task_test.c C 2011,2012,2016,2017,2018,2019,2020 ./lib/isc/tests/taskpool_test.c C 2011,2012,2016,2018,2019,2020 +./lib/isc/tests/tcp_quota_test.c C 2020 +./lib/isc/tests/tcp_test.c C 2020 +./lib/isc/tests/tcpdns_test.c C 2020 ./lib/isc/tests/testdata/file/keep X 2014,2018,2019,2020 ./lib/isc/tests/time_test.c C 2014,2015,2016,2018,2019,2020 ./lib/isc/tests/timer_test.c C 2018,2019,2020 +./lib/isc/tests/udp_test.c C 2020 +./lib/isc/tests/uv_wrap.h C 2020 ./lib/isc/timer.c C 1998,1999,2000,2001,2002,2004,2005,2007,2008,2009,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020 ./lib/isc/tm.c C 2014,2016,2018,2019,2020 ./lib/isc/unix/dir.c C 1999,2000,2001,2004,2005,2007,2008,2009,2011,2012,2016,2017,2018,2019,2020 diff --git a/util/suppressions.txt b/util/suppressions.txt index ea5ff3b292..27aa9b6e3a 100644 --- a/util/suppressions.txt +++ b/util/suppressions.txt @@ -1,3 +1,4 @@ unmatchedSuppression:* preprocessorErrorDirective:* unknownMacro:* +nullPointerRedundantCheck:*