diff --git a/CHANGES b/CHANGES index e8cd99b5ce..1a5abf7d2d 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,12 @@ +5744. [func] The network manager is now used for netlink sockets + to monitor network interface changes. This was the + last remaining use of the old isc_socket and + isc_socketmgr APIs, so they have now been removed. + The "named -S" argument and the "reserved-sockets" + option in named.conf have no function now, and are + deprecated. "socketmgr" statistics are no longer + reported in the statistics channel. [GL #2926] + 5743. [func] Add finer-grained "update-policy" rules, "krb5-subdomain-self-rhs" and "ms-subdomain-self-rhs", which restrict SRV and PTR record changes, allowing diff --git a/bin/check/named-checkzone.c b/bin/check/named-checkzone.c index ec7a69db89..86c4fe00f7 100644 --- a/bin/check/named-checkzone.c +++ b/bin/check/named-checkzone.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/bin/delv/delv.c b/bin/delv/delv.c index 64ab63ba24..9adb5cd26c 100644 --- a/bin/delv/delv.c +++ b/bin/delv/delv.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -1742,7 +1741,7 @@ main(int argc, char *argv[]) { CHECK(isc_appctx_create(mctx, &actx)); - isc_managers_create(mctx, 1, 0, 0, &netmgr, &taskmgr, &timermgr, NULL); + isc_managers_create(mctx, 1, 0, &netmgr, &taskmgr, &timermgr); parse_args(argc, argv); @@ -1844,7 +1843,7 @@ cleanup: dns_client_detach(&client); } - isc_managers_destroy(&netmgr, &taskmgr, &timermgr, NULL); + isc_managers_destroy(&netmgr, &taskmgr, &timermgr); if (actx != NULL) { isc_appctx_destroy(&actx); diff --git a/bin/dig/dig.c b/bin/dig/dig.c index 2de1728a61..ab34d9741a 100644 --- a/bin/dig/dig.c +++ b/bin/dig/dig.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/bin/dig/dighost.c b/bin/dig/dighost.c index fc45fab332..62fc949b39 100644 --- a/bin/dig/dighost.c +++ b/bin/dig/dighost.c @@ -1356,7 +1356,7 @@ setup_libs(void) { isc_log_setdebuglevel(lctx, 0); - isc_managers_create(mctx, 1, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(mctx, 1, 0, &netmgr, &taskmgr, NULL); result = isc_task_create(taskmgr, 0, &global_task); check_result(result, "isc_task_create"); @@ -4248,7 +4248,7 @@ destroy_libs(void) { isc_task_detach(&global_task); } - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); LOCK_LOOKUP; isc_refcount_destroy(&recvcount); diff --git a/bin/dig/dighost.h b/bin/dig/dighost.h index b808b92c70..80b3d106cf 100644 --- a/bin/dig/dighost.h +++ b/bin/dig/dighost.h @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include diff --git a/bin/dnssec/dnssec-signzone.c b/bin/dnssec/dnssec-signzone.c index bca4a36456..2e0d5e893c 100644 --- a/bin/dnssec/dnssec-signzone.c +++ b/bin/dnssec/dnssec-signzone.c @@ -3990,7 +3990,7 @@ main(int argc, char *argv[]) { print_time(outfp); print_version(outfp); - isc_managers_create(mctx, ntasks, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(mctx, ntasks, 0, &netmgr, &taskmgr, NULL); main_task = NULL; result = isc_task_create(taskmgr, 0, &main_task); @@ -4041,7 +4041,7 @@ main(int argc, char *argv[]) { for (i = 0; i < (int)ntasks; i++) { isc_task_detach(&tasks[i]); } - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); isc_mem_put(mctx, tasks, ntasks * sizeof(isc_task_t *)); postsign(); TIME_NOW(&sign_finish); diff --git a/bin/named/bind9.xsl b/bin/named/bind9.xsl index 5078115f27..59bca67470 100644 --- a/bin/named/bind9.xsl +++ b/bin/named/bind9.xsl @@ -11,7 +11,7 @@ --> - + @@ -928,55 +928,6 @@ - -

Network Status

- - - - - - - - - - - - - - - even - odd - - - - - - - - - - - - -
IDNameTypeReferencesLocalAddressPeerAddressState
- - - - - - - - - - - - - - - -
-
-

Task Manager Configuration

diff --git a/bin/named/include/named/globals.h b/bin/named/include/named/globals.h index b7798265db..83f5e4839f 100644 --- a/bin/named/include/named/globals.h +++ b/bin/named/include/named/globals.h @@ -59,22 +59,21 @@ EXTERN bool named_g_run_done INIT(false); * for really short timers, another for client timers, and one * for zone timers. */ -EXTERN isc_timermgr_t *named_g_timermgr INIT(NULL); -EXTERN isc_socketmgr_t *named_g_socketmgr INIT(NULL); -EXTERN isc_nm_t *named_g_netmgr INIT(NULL); -EXTERN cfg_parser_t *named_g_parser INIT(NULL); -EXTERN cfg_parser_t *named_g_addparser INIT(NULL); -EXTERN const char *named_g_version INIT(PACKAGE_VERSION); -EXTERN const char *named_g_product INIT(PACKAGE_NAME); -EXTERN const char *named_g_description INIT(PACKAGE_DESCRIPTION); -EXTERN const char *named_g_srcid INIT(PACKAGE_SRCID); -EXTERN const char *named_g_configargs INIT(PACKAGE_CONFIGARGS); -EXTERN const char *named_g_builder INIT(PACKAGE_BUILDER); -EXTERN in_port_t named_g_port INIT(0); -EXTERN in_port_t named_g_tlsport INIT(0); -EXTERN in_port_t named_g_httpsport INIT(0); -EXTERN in_port_t named_g_httpport INIT(0); -EXTERN isc_dscp_t named_g_dscp INIT(-1); +EXTERN isc_timermgr_t *named_g_timermgr INIT(NULL); +EXTERN isc_nm_t *named_g_netmgr INIT(NULL); +EXTERN cfg_parser_t *named_g_parser INIT(NULL); +EXTERN cfg_parser_t *named_g_addparser INIT(NULL); +EXTERN const char *named_g_version INIT(PACKAGE_VERSION); +EXTERN const char *named_g_product INIT(PACKAGE_NAME); +EXTERN const char *named_g_description INIT(PACKAGE_DESCRIPTION); +EXTERN const char *named_g_srcid INIT(PACKAGE_SRCID); +EXTERN const char *named_g_configargs INIT(PACKAGE_CONFIGARGS); +EXTERN const char *named_g_builder INIT(PACKAGE_BUILDER); +EXTERN in_port_t named_g_port INIT(0); +EXTERN in_port_t named_g_tlsport INIT(0); +EXTERN in_port_t named_g_httpsport INIT(0); +EXTERN in_port_t named_g_httpport INIT(0); +EXTERN isc_dscp_t named_g_dscp INIT(-1); EXTERN in_port_t named_g_http_listener_clients INIT(0); EXTERN in_port_t named_g_http_streams_per_conn INIT(0); diff --git a/bin/named/main.c b/bin/named/main.c index f0bf372def..f2f200a3f4 100644 --- a/bin/named/main.c +++ b/bin/named/main.c @@ -120,7 +120,6 @@ static char absolute_conffile[PATH_MAX]; static char saved_command_line[4096] = { 0 }; static char ellipsis[5] = { 0 }; static char version[512]; -static unsigned int maxsocks = 0; static int maxudp = 0; /* @@ -824,8 +823,7 @@ parse_command_line(int argc, char *argv[]) { want_stats = true; break; case 'S': - maxsocks = parse_int(isc_commandline_argument, - "max number of sockets"); + /* Formerly maxsocks */ break; case 't': /* XXXJAB should we make a copy? */ @@ -897,7 +895,6 @@ parse_command_line(int argc, char *argv[]) { static isc_result_t create_managers(void) { isc_result_t result; - unsigned int socks; INSIST(named_g_cpus_detected > 0); @@ -921,30 +918,21 @@ create_managers(void) { named_g_udpdisp == 1 ? "" : "s"); result = isc_managers_create(named_g_mctx, named_g_cpus, - 0 /* quantum */, maxsocks, &named_g_netmgr, - &named_g_taskmgr, &named_g_timermgr, - &named_g_socketmgr); + 0 /* quantum */, &named_g_netmgr, + &named_g_taskmgr, &named_g_timermgr); if (result != ISC_R_SUCCESS) { return (result); } - isc_socketmgr_maxudp(named_g_socketmgr, maxudp); isc_nm_maxudp(named_g_netmgr, maxudp); - result = isc_socketmgr_getmaxsockets(named_g_socketmgr, &socks); - if (result == ISC_R_SUCCESS) { - isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL, - NAMED_LOGMODULE_SERVER, ISC_LOG_INFO, - "using up to %u sockets", socks); - } - return (ISC_R_SUCCESS); } static void destroy_managers(void) { isc_managers_destroy(&named_g_netmgr, &named_g_taskmgr, - &named_g_timermgr, &named_g_socketmgr); + &named_g_timermgr); } static void diff --git a/bin/named/named.conf.rst b/bin/named/named.conf.rst index 3793cfe123..e04891ab2a 100644 --- a/bin/named/named.conf.rst +++ b/bin/named/named.conf.rst @@ -391,7 +391,7 @@ OPTIONS request-ixfr boolean; request-nsid boolean; require-server-cookie boolean; - reserved-sockets integer; + reserved-sockets integer;// deprecated resolver-nonbackoff-tries integer; resolver-query-timeout integer; resolver-retry-interval integer; diff --git a/bin/named/named.rst b/bin/named/named.rst index cd7becc840..4fb58f1a76 100644 --- a/bin/named/named.rst +++ b/bin/named/named.rst @@ -29,7 +29,7 @@ named - Internet domain name server Synopsis ~~~~~~~~ -:program:`named` [ [**-4**] | [**-6**] ] [**-c** config-file] [**-d** debug-level] [**-D** string] [**-E** engine-name] [**-f**] [**-g**] [**-L** logfile] [**-M** option] [**-m** flag] [**-n** #cpus] [**-p** port] [**-s**] [**-S** #max-socks] [**-t** directory] [**-U** #listeners] [**-u** user] [**-v**] [**-V**] [**-X** lock-file] +:program:`named` [ [**-4**] | [**-6**] ] [**-c** config-file] [**-d** debug-level] [**-D** string] [**-E** engine-name] [**-f**] [**-g**] [**-L** logfile] [**-M** option] [**-m** flag] [**-n** #cpus] [**-p** port] [**-s**] [**-t** directory] [**-U** #listeners] [**-u** user] [**-v**] [**-V**] [**-X** lock-file] Description ~~~~~~~~~~~ @@ -126,9 +126,7 @@ Options removed or changed in a future release. ``-S #max-socks`` - This option allows ``named`` to use up to ``#max-socks`` sockets. The default value is - 21000 on systems built with default configuration options, and 4096 - on systems built with ``configure --with-tuning=small``. + This option is deprecated and no longer has any function. .. warning:: diff --git a/bin/named/server.c b/bin/named/server.c index c8e7232da4..f50f4db850 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #include #include @@ -8348,11 +8347,9 @@ load_configuration(const char *filename, named_server_t *server, isc_logconfig_t *logc = NULL; isc_portset_t *v4portset = NULL; isc_portset_t *v6portset = NULL; - isc_resourcevalue_t nfiles; isc_result_t result, tresult; uint32_t heartbeat_interval; uint32_t interface_interval; - uint32_t reserved; uint32_t udpsize; uint32_t transfer_message_size; uint32_t recv_tcp_buffer_size; @@ -8363,7 +8360,6 @@ load_configuration(const char *filename, named_server_t *server, named_cachelist_t cachelist, tmpcachelist; ns_altsecret_t *altsecret; ns_altsecretlist_t altsecrets, tmpaltsecrets; - unsigned int maxsocks; uint32_t softquota = 0; uint32_t max; uint64_t initial, idle, keepalive, advertised; @@ -8516,52 +8512,6 @@ load_configuration(const char *filename, named_server_t *server, */ CHECK(check_lockfile(server, config, first_time)); - /* - * Check if max number of open sockets that the system allows is - * sufficiently large. Failing this condition is not necessarily fatal, - * but may cause subsequent runtime failures for a busy recursive - * server. - */ - result = isc_socketmgr_getmaxsockets(named_g_socketmgr, &maxsocks); - if (result != ISC_R_SUCCESS) { - maxsocks = 0; - } - result = isc_resource_getcurlimit(isc_resource_openfiles, &nfiles); - if (result == ISC_R_SUCCESS && (isc_resourcevalue_t)maxsocks > nfiles) { - isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL, - NAMED_LOGMODULE_SERVER, ISC_LOG_WARNING, - "max open files (%" PRIu64 ")" - " is smaller than max sockets (%u)", - nfiles, maxsocks); - } - - /* - * Set the number of socket reserved for TCP, stdio etc. - */ - obj = NULL; - result = named_config_get(maps, "reserved-sockets", &obj); - INSIST(result == ISC_R_SUCCESS); - reserved = cfg_obj_asuint32(obj); - if (maxsocks != 0) { - if (maxsocks < 128U) { /* Prevent underflow. */ - reserved = 0; - } else if (reserved > maxsocks - 128U) { /* Minimum UDP space. - */ - reserved = maxsocks - 128; - } - } - /* Minimum TCP/stdio space. */ - if (reserved < 128U) { - reserved = 128; - } - if (reserved + 128U > maxsocks && maxsocks != 0) { - isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL, - NAMED_LOGMODULE_SERVER, ISC_LOG_WARNING, - "less than 128 UDP sockets available after " - "applying 'reserved-sockets' and 'maxsockets'"); - } - isc_socketmgr_setreserved(named_g_socketmgr, reserved); - #if defined(HAVE_GEOIP2) /* * Release any previously opened GeoIP2 databases. @@ -9871,11 +9821,11 @@ run_server(isc_task_t *task, isc_event_t *event) { geoip = NULL; #endif /* if defined(HAVE_GEOIP2) */ - CHECKFATAL(ns_interfacemgr_create( - named_g_mctx, server->sctx, named_g_taskmgr, - named_g_timermgr, named_g_socketmgr, named_g_netmgr, - named_g_dispatchmgr, server->task, geoip, - named_g_cpus, &server->interfacemgr), + CHECKFATAL(ns_interfacemgr_create(named_g_mctx, server->sctx, + named_g_taskmgr, named_g_timermgr, + named_g_netmgr, named_g_dispatchmgr, + server->task, geoip, named_g_cpus, + true, &server->interfacemgr), "creating interface manager"); CHECKFATAL(isc_timer_create(named_g_timermgr, isc_timertype_inactive, @@ -10182,7 +10132,6 @@ named_server_create(isc_mem_t *mctx, named_server_t **serverp) { CHECKFATAL(isc_stats_create(server->mctx, &server->sockstats, isc_sockstatscounter_max), "isc_stats_create"); - isc_socketmgr_setstats(named_g_socketmgr, server->sockstats); isc_nm_setstats(named_g_netmgr, server->sockstats); CHECKFATAL(isc_stats_create(named_g_mctx, &server->zonestats, diff --git a/bin/named/statschannel.c b/bin/named/statschannel.c index c9685d8e19..dcd1a3e231 100644 --- a/bin/named/statschannel.c +++ b/bin/named/statschannel.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -2313,13 +2312,6 @@ generatexml(named_server_t *server, uint32_t flags, int *buflen, } TRY0(xmlTextWriterEndElement(writer)); /* /views */ - if ((flags & STATS_XML_NET) != 0) { - TRY0(xmlTextWriterStartElement(writer, - ISC_XMLCHAR "socketmgr")); - TRY0(isc_socketmgr_renderxml(named_g_socketmgr, writer)); - TRY0(xmlTextWriterEndElement(writer)); /* /socketmgr */ - } - if ((flags & STATS_XML_TASKS) != 0) { TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "taskmgr")); TRY0(isc_taskmgr_renderxml(named_g_taskmgr, writer)); @@ -3103,7 +3095,6 @@ generatejson(named_server_t *server, size_t *msglen, const char **msg, if ((flags & STATS_JSON_NET) != 0) { /* socket stat counters */ - json_object *sockets; counters = json_object_new_object(); dumparg.result = ISC_R_SUCCESS; @@ -3124,17 +3115,6 @@ generatejson(named_server_t *server, size_t *msglen, const char **msg, } else { json_object_put(counters); } - - sockets = json_object_new_object(); - CHECKMEM(sockets); - - result = isc_socketmgr_renderjson(named_g_socketmgr, sockets); - if (result != ISC_R_SUCCESS) { - json_object_put(sockets); - goto cleanup; - } - - json_object_object_add(bindstats, "socketmgr", sockets); } if ((flags & STATS_JSON_TASKS) != 0) { diff --git a/bin/nsupdate/nsupdate.c b/bin/nsupdate/nsupdate.c index ab2f1b07f4..5202c5fab2 100644 --- a/bin/nsupdate/nsupdate.c +++ b/bin/nsupdate/nsupdate.c @@ -913,8 +913,7 @@ setup_system(void) { irs_resconf_destroy(&resconf); - result = isc_managers_create(gmctx, 1, 0, 0, &netmgr, &taskmgr, NULL, - NULL); + result = isc_managers_create(gmctx, 1, 0, &netmgr, &taskmgr, NULL); check_result(result, "isc_managers_create"); result = dns_dispatchmgr_create(gmctx, netmgr, &dispatchmgr); @@ -3320,7 +3319,7 @@ cleanup(void) { } ddebug("Shutting down managers"); - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); ddebug("Destroying event"); isc_event_free(&global_event); diff --git a/bin/rndc/rndc.c b/bin/rndc/rndc.c index f22c9fedf0..ba5ad175f0 100644 --- a/bin/rndc/rndc.c +++ b/bin/rndc/rndc.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -1029,7 +1028,7 @@ main(int argc, char **argv) { serial = isc_random32(); isc_mem_create(&rndc_mctx); - isc_managers_create(rndc_mctx, 1, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(rndc_mctx, 1, 0, &netmgr, &taskmgr, NULL); DO("create task", isc_task_create(taskmgr, 0, &rndc_task)); isc_log_create(rndc_mctx, &log, &logconfig); isc_log_setcontext(log); @@ -1084,7 +1083,7 @@ main(int argc, char **argv) { } isc_task_detach(&rndc_task); - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); /* * Note: when TCP connections are shut down, there will be a final diff --git a/bin/tests/system/pipelined/pipequeries.c b/bin/tests/system/pipelined/pipequeries.c index 3ee5fade07..78965bf975 100644 --- a/bin/tests/system/pipelined/pipequeries.c +++ b/bin/tests/system/pipelined/pipequeries.c @@ -262,7 +262,7 @@ main(int argc, char *argv[]) { RUNCHECK(dst_lib_init(mctx, NULL)); - isc_managers_create(mctx, 1, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(mctx, 1, 0, &netmgr, &taskmgr, NULL); RUNCHECK(isc_task_create(taskmgr, 0, &task)); RUNCHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); @@ -287,7 +287,7 @@ main(int argc, char *argv[]) { isc_task_shutdown(task); isc_task_detach(&task); - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); dst_lib_destroy(); diff --git a/bin/tests/system/resolve.c b/bin/tests/system/resolve.c index 1735f1cc6e..2cc9668368 100644 --- a/bin/tests/system/resolve.c +++ b/bin/tests/system/resolve.c @@ -61,7 +61,7 @@ isc_timermgr_t *ctxs_timermgr = NULL; static void ctxs_destroy(void) { - isc_managers_destroy(&ctxs_netmgr, &ctxs_taskmgr, &ctxs_timermgr, NULL); + isc_managers_destroy(&ctxs_netmgr, &ctxs_taskmgr, &ctxs_timermgr); if (ctxs_actx != NULL) { isc_appctx_destroy(&ctxs_actx); @@ -83,8 +83,8 @@ ctxs_init(void) { goto fail; } - isc_managers_create(ctxs_mctx, 1, 0, 0, &ctxs_netmgr, &ctxs_taskmgr, - &ctxs_timermgr, NULL); + isc_managers_create(ctxs_mctx, 1, 0, &ctxs_netmgr, &ctxs_taskmgr, + &ctxs_timermgr); result = isc_app_ctxstart(ctxs_actx); if (result != ISC_R_SUCCESS) { diff --git a/bin/tests/system/statistics/tests.sh b/bin/tests/system/statistics/tests.sh index b019dfac69..8db7741013 100644 --- a/bin/tests/system/statistics/tests.sh +++ b/bin/tests/system/statistics/tests.sh @@ -201,7 +201,6 @@ if $FEATURETEST --have-libxml2 && [ -x "${CURL}" ] && [ -x "${XSLTPROC}" ] ; th # grep "

Glue cache statistics

" xsltproc.out.${n} >/dev/null || ret=1 grep "

View _default" xsltproc.out.${n} >/dev/null || ret=1 grep "

Zone example" xsltproc.out.${n} >/dev/null || ret=1 - grep "

Network Status

" xsltproc.out.${n} >/dev/null || ret=1 grep "

Task Manager Configuration

" xsltproc.out.${n} >/dev/null || ret=1 grep "

Tasks

" xsltproc.out.${n} >/dev/null || ret=1 grep "

Memory Usage Summary

" xsltproc.out.${n} >/dev/null || ret=1 diff --git a/bin/tests/system/tkey/keycreate.c b/bin/tests/system/tkey/keycreate.c index 085648c178..cfd5a9aeac 100644 --- a/bin/tests/system/tkey/keycreate.c +++ b/bin/tests/system/tkey/keycreate.c @@ -219,7 +219,7 @@ main(int argc, char *argv[]) { RUNCHECK(dst_lib_init(mctx, NULL)); - isc_managers_create(mctx, 1, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(mctx, 1, 0, &netmgr, &taskmgr, NULL); RUNCHECK(isc_task_create(taskmgr, 0, &task)); RUNCHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); @@ -254,7 +254,7 @@ main(int argc, char *argv[]) { dns_dispatchmgr_detach(&dispatchmgr); isc_task_shutdown(task); isc_task_detach(&task); - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); dst_key_free(&ourkey); dns_tsigkey_detach(&initialkey); diff --git a/bin/tests/system/tkey/keydelete.c b/bin/tests/system/tkey/keydelete.c index 6175748957..69bec035fa 100644 --- a/bin/tests/system/tkey/keydelete.c +++ b/bin/tests/system/tkey/keydelete.c @@ -163,7 +163,7 @@ main(int argc, char **argv) { RUNCHECK(dst_lib_init(mctx, NULL)); - isc_managers_create(mctx, 1, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(mctx, 1, 0, &netmgr, &taskmgr, NULL); RUNCHECK(isc_task_create(taskmgr, 0, &task)); RUNCHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); @@ -197,7 +197,7 @@ main(int argc, char **argv) { dns_dispatchmgr_detach(&dispatchmgr); isc_task_shutdown(task); isc_task_detach(&task); - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); dns_tsigkeyring_detach(&ring); diff --git a/bin/tests/test_client.c b/bin/tests/test_client.c index 743adf44e6..339e664c3a 100644 --- a/bin/tests/test_client.c +++ b/bin/tests/test_client.c @@ -306,7 +306,7 @@ setup(void) { isc_mem_create(&mctx); - isc_managers_create(mctx, workers, 0, 0, &netmgr, NULL, NULL, NULL); + isc_managers_create(mctx, workers, 0, &netmgr, NULL, NULL); } static void @@ -315,7 +315,7 @@ teardown(void) { close(out); } - isc_managers_destroy(&netmgr, NULL, NULL, NULL); + isc_managers_destroy(&netmgr, NULL, NULL); isc_mem_destroy(&mctx); if (tls_ctx) { isc_tlsctx_free(&tls_ctx); diff --git a/bin/tests/test_server.c b/bin/tests/test_server.c index 5ef0428eb9..9e28fe8b1c 100644 --- a/bin/tests/test_server.c +++ b/bin/tests/test_server.c @@ -187,12 +187,12 @@ setup(void) { isc_mem_create(&mctx); - isc_managers_create(mctx, workers, 0, 0, &netmgr, NULL, NULL, NULL); + isc_managers_create(mctx, workers, 0, &netmgr, NULL, NULL); } static void teardown(void) { - isc_managers_destroy(&netmgr, NULL, NULL, NULL); + isc_managers_destroy(&netmgr, NULL, NULL); isc_mem_destroy(&mctx); if (tls_ctx) { isc_tlsctx_free(&tls_ctx); diff --git a/bin/tools/mdig.c b/bin/tools/mdig.c index 7f59d5cc01..364b18cbeb 100644 --- a/bin/tools/mdig.c +++ b/bin/tools/mdig.c @@ -2115,7 +2115,7 @@ main(int argc, char *argv[]) { fatal("can't choose between IPv4 and IPv6"); } - isc_managers_create(mctx, 1, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(mctx, 1, 0, &netmgr, &taskmgr, NULL); RUNCHECK(isc_task_create(taskmgr, 0, &task)); RUNCHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); @@ -2175,7 +2175,7 @@ main(int argc, char *argv[]) { isc_task_shutdown(task); isc_task_detach(&task); - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); dst_lib_destroy(); diff --git a/doc/arm/reference.rst b/doc/arm/reference.rst index 1d5ec4f522..07f793e731 100644 --- a/doc/arm/reference.rst +++ b/doc/arm/reference.rst @@ -3046,13 +3046,7 @@ system. most two places after the decimal point are significant. ``reserved-sockets`` - This sets the number of file descriptors reserved for TCP, stdio, etc. This - needs to be big enough to cover the number of interfaces ``named`` - listens on plus ``tcp-clients``, as well as to provide room for - outgoing TCP queries and incoming zone transfers. The default is - ``512``. The minimum value is ``128`` and the maximum value is - ``128`` fewer than maxsockets (-S). This option may be removed in the - future. + This option is deprecated and no longer has any effect. ``max-cache-size`` This sets the maximum amount of memory to use for an individual cache diff --git a/doc/dev/dev.md b/doc/dev/dev.md index c68f445ede..086c5a5d40 100644 --- a/doc/dev/dev.md +++ b/doc/dev/dev.md @@ -1370,32 +1370,6 @@ queue, the specified function will be called. Examples: -`isc_socket_recv()` calls the `recv()` system call asynchronously: rather -than waiting for data, it returns immediately, but it sets up an event to -be triggered when the `recv()` call completes; BIND can now do other work -instead of waiting for I/O. Once the `recv()` is finished, the -associated event is triggered. - - - /* - * Function to handle a completed recv() - */ - static void - recvdone(isc_task_t *task, isc_event_t *event) { - /* Arguments are in event->ev_arg. */ - } - - ... - - /* - * Call recv() on socket 'sock', put results into 'region', - * minimum read size 1, and call recvdone() with NULL as - * argument. (Note: 'sock' is already associated with a - * particular task, so that doesn't need to be specified - * here.) - */ - isc_socket_recv(sock, ®ion, 1, recvdone, NULL); - A timer is set for a specified time in the future, and the event will be triggered at that time. diff --git a/doc/man/named.8in b/doc/man/named.8in index 10647c1641..8d24841f90 100644 --- a/doc/man/named.8in +++ b/doc/man/named.8in @@ -32,7 +32,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] named \- Internet domain name server .SH SYNOPSIS .sp -\fBnamed\fP [ [\fB\-4\fP] | [\fB\-6\fP] ] [\fB\-c\fP config\-file] [\fB\-d\fP debug\-level] [\fB\-D\fP string] [\fB\-E\fP engine\-name] [\fB\-f\fP] [\fB\-g\fP] [\fB\-L\fP logfile] [\fB\-M\fP option] [\fB\-m\fP flag] [\fB\-n\fP #cpus] [\fB\-p\fP port] [\fB\-s\fP] [\fB\-S\fP #max\-socks] [\fB\-t\fP directory] [\fB\-U\fP #listeners] [\fB\-u\fP user] [\fB\-v\fP] [\fB\-V\fP] [\fB\-X\fP lock\-file] +\fBnamed\fP [ [\fB\-4\fP] | [\fB\-6\fP] ] [\fB\-c\fP config\-file] [\fB\-d\fP debug\-level] [\fB\-D\fP string] [\fB\-E\fP engine\-name] [\fB\-f\fP] [\fB\-g\fP] [\fB\-L\fP logfile] [\fB\-M\fP option] [\fB\-m\fP flag] [\fB\-n\fP #cpus] [\fB\-p\fP port] [\fB\-s\fP] [\fB\-t\fP directory] [\fB\-U\fP #listeners] [\fB\-u\fP user] [\fB\-v\fP] [\fB\-V\fP] [\fB\-X\fP lock\-file] .SH DESCRIPTION .sp \fBnamed\fP is a Domain Name System (DNS) server, part of the BIND 9 @@ -131,9 +131,7 @@ removed or changed in a future release. .INDENT 0.0 .TP .B \fB\-S #max\-socks\fP -This option allows \fBnamed\fP to use up to \fB#max\-socks\fP sockets. The default value is -21000 on systems built with default configuration options, and 4096 -on systems built with \fBconfigure \-\-with\-tuning=small\fP\&. +This option is deprecated and no longer has any function. .UNINDENT .sp \fBWARNING:\fP diff --git a/doc/man/named.conf.5in b/doc/man/named.conf.5in index b5bcfcccf8..7e129e4bf1 100644 --- a/doc/man/named.conf.5in +++ b/doc/man/named.conf.5in @@ -458,7 +458,7 @@ options { request\-ixfr boolean; request\-nsid boolean; require\-server\-cookie boolean; - reserved\-sockets integer; + reserved\-sockets integer;// deprecated resolver\-nonbackoff\-tries integer; resolver\-query\-timeout integer; resolver\-retry\-interval integer; diff --git a/doc/misc/options b/doc/misc/options index fb9db8c4c6..02b6f7b609 100644 --- a/doc/misc/options +++ b/doc/misc/options @@ -316,7 +316,7 @@ options { request-ixfr ; request-nsid ; require-server-cookie ; - reserved-sockets ; + reserved-sockets ; // deprecated resolver-nonbackoff-tries ; resolver-query-timeout ; resolver-retry-interval ; diff --git a/doc/misc/options.active b/doc/misc/options.active index c8d71c5532..491a025ed4 100644 --- a/doc/misc/options.active +++ b/doc/misc/options.active @@ -314,7 +314,7 @@ options { request-ixfr ; request-nsid ; require-server-cookie ; - reserved-sockets ; + reserved-sockets ; // deprecated resolver-nonbackoff-tries ; resolver-query-timeout ; resolver-retry-interval ; diff --git a/doc/misc/options.grammar.rst b/doc/misc/options.grammar.rst index 8adff8b414..2c440420f1 100644 --- a/doc/misc/options.grammar.rst +++ b/doc/misc/options.grammar.rst @@ -234,7 +234,7 @@ request-ixfr ; request-nsid ; require-server-cookie ; - reserved-sockets ; + reserved-sockets ; // deprecated resolver-nonbackoff-tries ; resolver-query-timeout ; resolver-retry-interval ; diff --git a/doc/notes/notes-current.rst b/doc/notes/notes-current.rst index 5e82f9c4b0..98874357fd 100644 --- a/doc/notes/notes-current.rst +++ b/doc/notes/notes-current.rst @@ -79,6 +79,9 @@ Feature Changes including ``nsupdate``, ``delv``, ``mdig``, to send all outgoing DNS queries and requests. :gl:`#2401` +- Because the old socket manager API has been removed, "socketmgr" + statistics are no longer reported by the statistics channel. :gl:`#2926` + - Zone transfers over TLS (XoT) now need "dot" Application-Layer Protocol Negotiation (ALPN) tag to be negotiated, as required by the RFC 9103. :gl: `#2794` diff --git a/lib/dns/client.c b/lib/dns/client.c index 1e18f96e69..32f146cd4e 100644 --- a/lib/dns/client.c +++ b/lib/dns/client.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/lib/dns/include/dns/dispatch.h b/lib/dns/include/dns/dispatch.h index a53e66e746..83f40237f8 100644 --- a/lib/dns/include/dns/dispatch.h +++ b/lib/dns/include/dns/dispatch.h @@ -33,8 +33,7 @@ * * Security: * - *\li Depends on the isc_socket_t and dns_message_t for prevention of - * buffer overruns. + *\li Depends on dns_message_t for prevention of buffer overruns. * * Standards: * @@ -52,7 +51,6 @@ #include #include #include -#include #include #include @@ -187,7 +185,7 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, const isc_sockaddr_t *destaddr, isc_dscp_t dscp, dns_dispatch_t **dispp); /*%< - * Create a new dns_dispatch and attach it to the provided isc_socket_t. + * Create a new TCP dns_dispatch. * * Requires: * diff --git a/lib/dns/include/dns/resolver.h b/lib/dns/include/dns/resolver.h index 63edb07774..3aa0ab80ea 100644 --- a/lib/dns/include/dns/resolver.h +++ b/lib/dns/include/dns/resolver.h @@ -46,8 +46,8 @@ #include #include +#include #include -#include #include #include diff --git a/lib/dns/rbt.c b/lib/dns/rbt.c index b8e8a6c9e6..cf57ef20fb 100644 --- a/lib/dns/rbt.c +++ b/lib/dns/rbt.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index 8cc29119f9..08b9c29201 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/lib/dns/request.c b/lib/dns/request.c index 98a389b594..c3516be976 100644 --- a/lib/dns/request.c +++ b/lib/dns/request.c @@ -77,7 +77,6 @@ struct dns_request { dns_requestmgr_t *requestmgr; isc_buffer_t *tsig; dns_tsigkey_t *tsigkey; - isc_socketevent_t sendevent; isc_sockaddr_t destaddr; unsigned int timeout; unsigned int udpcount; diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index 8e2e85ae62..04330a3206 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -237,11 +237,10 @@ typedef struct query { isc_buffer_t buffer; isc_buffer_t *tsig; dns_tsigkey_t *tsigkey; - isc_socketevent_t sendevent; isc_dscp_t dscp; int ednsversion; unsigned int options; - isc_sockeventattr_t attributes; + unsigned int attributes; unsigned int udpsize; unsigned char data[512]; } resquery_t; diff --git a/lib/dns/tests/dispatch_test.c b/lib/dns/tests/dispatch_test.c index 93eac9f5ae..d965a0c3b8 100644 --- a/lib/dns/tests/dispatch_test.c +++ b/lib/dns/tests/dispatch_test.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -143,8 +142,7 @@ _setup(void **state) { close(sock); /* Create a secondary network manager */ - isc_managers_create(dt_mctx, ncpus, 0, 0, &connect_nm, NULL, NULL, - NULL); + isc_managers_create(dt_mctx, ncpus, 0, &connect_nm, NULL, NULL); isc_nm_settimeouts(netmgr, T_SERVER_INIT, T_SERVER_IDLE, T_SERVER_KEEPALIVE, T_SERVER_ADVERTISED); @@ -170,7 +168,7 @@ _teardown(void **state) { uv_sem_destroy(&sem); - isc_managers_destroy(&connect_nm, NULL, NULL, NULL); + isc_managers_destroy(&connect_nm, NULL, NULL); assert_null(connect_nm); dns_test_end(); diff --git a/lib/dns/tests/dnstest.c b/lib/dns/tests/dnstest.c index e415e7a0e0..de646cb565 100644 --- a/lib/dns/tests/dnstest.c +++ b/lib/dns/tests/dnstest.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -99,7 +98,7 @@ cleanup_managers(void) { isc_managers_destroy(netmgr == NULL ? NULL : &netmgr, taskmgr == NULL ? NULL : &taskmgr, - timermgr == NULL ? NULL : &timermgr, NULL); + timermgr == NULL ? NULL : &timermgr); if (app_running) { isc_app_finish(); @@ -111,8 +110,7 @@ create_managers(void) { isc_result_t result; ncpus = isc_os_ncpus(); - isc_managers_create(dt_mctx, ncpus, 0, 0, &netmgr, &taskmgr, &timermgr, - NULL); + isc_managers_create(dt_mctx, ncpus, 0, &netmgr, &taskmgr, &timermgr); CHECK(isc_task_create(taskmgr, 0, &maintask)); return (ISC_R_SUCCESS); diff --git a/lib/dns/tests/rbt_test.c b/lib/dns/tests/rbt_test.c index 3839ebc0cb..ad761efd91 100644 --- a/lib/dns/tests/rbt_test.c +++ b/lib/dns/tests/rbt_test.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include diff --git a/lib/dns/tests/resolver_test.c b/lib/dns/tests/resolver_test.c index 866498e35e..8113693eac 100644 --- a/lib/dns/tests/resolver_test.c +++ b/lib/dns/tests/resolver_test.c @@ -23,8 +23,8 @@ #include #include +#include #include -#include #include #include #include diff --git a/lib/isc/Makefile.am b/lib/isc/Makefile.am index 639b1b7b2d..70358f8a82 100644 --- a/lib/isc/Makefile.am +++ b/lib/isc/Makefile.am @@ -82,7 +82,6 @@ libisc_la_HEADERS = \ include/isc/serial.h \ include/isc/siphash.h \ include/isc/sockaddr.h \ - include/isc/socket.h \ include/isc/stat.h \ include/isc/stats.h \ include/isc/stdatomic.h \ @@ -186,8 +185,6 @@ libisc_la_SOURCES = \ serial.c \ siphash.c \ sockaddr.c \ - socket.c \ - socket_p.h \ stats.c \ stdio.c \ stdtime.c \ diff --git a/lib/isc/httpd.c b/lib/isc/httpd.c index 5daf349426..b71d323823 100644 --- a/lib/isc/httpd.c +++ b/lib/isc/httpd.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/lib/isc/include/isc/managers.h b/lib/isc/include/isc/managers.h index 3b3227b09e..8543dbdcf9 100644 --- a/lib/isc/include/isc/managers.h +++ b/lib/isc/include/isc/managers.h @@ -13,7 +13,6 @@ #include #include -#include #include #include @@ -21,10 +20,9 @@ typedef struct isc_managers isc_managers_t; isc_result_t isc_managers_create(isc_mem_t *mctx, size_t workers, size_t quantum, - size_t sockets, isc_nm_t **netmgrp, - isc_taskmgr_t **taskmgrp, isc_timermgr_t **timermgrp, - isc_socketmgr_t **socketmgrp); + isc_nm_t **netmgrp, isc_taskmgr_t **taskmgrp, + isc_timermgr_t **timermgrp); void isc_managers_destroy(isc_nm_t **netmgrp, isc_taskmgr_t **taskmgrp, - isc_timermgr_t **timermgrp, isc_socketmgr_t **socketmgrp); + isc_timermgr_t **timermgrp); diff --git a/lib/isc/include/isc/netmgr.h b/lib/isc/include/isc/netmgr.h index 980bff2a14..b517eaaec2 100644 --- a/lib/isc/include/isc/netmgr.h +++ b/lib/isc/include/isc/netmgr.h @@ -232,6 +232,18 @@ isc_nm_udpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, * 'cb'. */ +isc_result_t +isc_nm_routeconnect(isc_nm_t *mgr, isc_nm_cb_t cb, void *cbarg, + size_t extrahandlesize); +/*%< + * Open a route/netlink socket and call 'cb', so the caller can be + * begin listening for interface changes. This behaves similarly to + * isc_nm_udpconnect(). + * + * Returns ISC_R_NOTIMPLEMENTED on systems where route/netlink sockets + * are not supported. + */ + void isc_nm_stoplistening(isc_nmsocket_t *sock); /*%< diff --git a/lib/isc/include/isc/socket.h b/lib/isc/include/isc/socket.h deleted file mode 100644 index 10ac659a8b..0000000000 --- a/lib/isc/include/isc/socket.h +++ /dev/null @@ -1,842 +0,0 @@ -/* - * Copyright (C) Internet Systems Consortium, Inc. ("ISC") - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, you can obtain one at https://mozilla.org/MPL/2.0/. - * - * See the COPYRIGHT file distributed with this work for additional - * information regarding copyright ownership. - */ - -#pragma once - -/***** -***** Module Info -*****/ - -/*! \file isc/socket.h - * \brief Provides TCP and UDP sockets for network I/O. The sockets are event - * sources in the task system. - * - * When I/O completes, a completion event for the socket is posted to the - * event queue of the task which requested the I/O. - * - * \li MP: - * The module ensures appropriate synchronization of data structures it - * creates and manipulates. - * Clients of this module must not be holding a socket's task's lock when - * making a call that affects that socket. Failure to follow this rule - * can result in deadlock. - * The caller must ensure that isc_socketmgr_destroy() is called only - * once for a given manager. - * - * \li Reliability: - * No anticipated impact. - * - * \li Resources: - * TBS - * - * \li Security: - * No anticipated impact. - * - * \li Standards: - * None. - */ - -/*** - *** Imports - ***/ - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -ISC_LANG_BEGINDECLS - -/*** - *** Constants - ***/ - -/*% - * Maximum number of buffers in a scatter/gather read/write. The operating - * system in use must support at least this number (plus one on some.) - */ -#define ISC_SOCKET_MAXSCATTERGATHER 8 - -/*@{*/ -/*! - * Socket options: - * - * _REUSEADDRESS: Set SO_REUSEADDR prior to calling bind(), - * if a non-zero port is specified (applies to - * AF_INET and AF_INET6). - */ -typedef enum { - ISC_SOCKET_REUSEADDRESS = 0x01U, -} isc_socket_options_t; -/*@}*/ - -/*@{*/ -/*! - * _ATTACHED: Internal use only. - * _TRUNC: Packet was truncated on receive. - * _CTRUNC: Packet control information was truncated. This can - * indicate that the packet is not complete, even though - * all the data is valid. - * _TIMESTAMP: The timestamp member is valid. - * _PKTINFO: The pktinfo member is valid. - * _MULTICAST: The UDP packet was received via a multicast transmission. - * _DSCP: The UDP DSCP value is valid. - * _USEMINMTU: Set the per packet IPV6_USE_MIN_MTU flag. - */ -typedef enum { - ISC_SOCKEVENTATTR_ATTACHED = 0x10000000U, /* internal */ - ISC_SOCKEVENTATTR_TRUNC = 0x00800000U, /* public */ - ISC_SOCKEVENTATTR_CTRUNC = 0x00400000U, /* public */ - ISC_SOCKEVENTATTR_TIMESTAMP = 0x00200000U, /* public */ - ISC_SOCKEVENTATTR_PKTINFO = 0x00100000U, /* public */ - ISC_SOCKEVENTATTR_MULTICAST = 0x00080000U, /* public */ - ISC_SOCKEVENTATTR_DSCP = 0x00040000U, /* public */ - ISC_SOCKEVENTATTR_USEMINMTU = 0x00020000U /* public */ -} isc_sockeventattr_t; -/*@}*/ - -/*** - *** Types - ***/ - -struct isc_socketevent { - ISC_EVENT_COMMON(isc_socketevent_t); - isc_result_t result; /*%< OK, EOF, whatever else */ - unsigned int minimum; /*%< minimum i/o for event */ - unsigned int n; /*%< bytes read or written */ - unsigned int offset; /*%< offset into buffer list */ - isc_region_t region; /*%< for single-buffer i/o */ - isc_sockaddr_t address; /*%< source address */ - isc_time_t timestamp; /*%< timestamp of packet recv */ - struct in6_pktinfo pktinfo; /*%< ipv6 pktinfo */ - isc_sockeventattr_t attributes; /*%< see isc_sockeventattr_t - * enum */ - isc_eventdestructor_t destroy; /*%< original destructor */ - unsigned int dscp; /*%< UDP dscp value */ -}; - -typedef struct isc_socket_newconnev isc_socket_newconnev_t; -struct isc_socket_newconnev { - ISC_EVENT_COMMON(isc_socket_newconnev_t); - isc_socket_t *newsocket; - isc_result_t result; /*%< OK, EOF, whatever else */ - isc_sockaddr_t address; /*%< source address */ -}; - -typedef struct isc_socket_connev isc_socket_connev_t; -struct isc_socket_connev { - ISC_EVENT_COMMON(isc_socket_connev_t); - isc_result_t result; /*%< OK, EOF, whatever else */ -}; - -#define ISC_SOCKEVENT_ANYEVENT (0) -#define ISC_SOCKEVENT_RECVDONE (ISC_EVENTCLASS_SOCKET + 1) -#define ISC_SOCKEVENT_SENDDONE (ISC_EVENTCLASS_SOCKET + 2) -#define ISC_SOCKEVENT_NEWCONN (ISC_EVENTCLASS_SOCKET + 3) -#define ISC_SOCKEVENT_CONNECT (ISC_EVENTCLASS_SOCKET + 4) - -/* - * Internal events. - */ -#define ISC_SOCKEVENT_INTR (ISC_EVENTCLASS_SOCKET + 256) -#define ISC_SOCKEVENT_INTW (ISC_EVENTCLASS_SOCKET + 257) - -typedef enum { - isc_sockettype_udp = 1, - isc_sockettype_tcp = 2, - isc_sockettype_unix = 3, - isc_sockettype_raw = 4 -} isc_sockettype_t; - -/*@{*/ -/*! - * How a socket should be shutdown in isc_socket_shutdown() calls. - */ -#define ISC_SOCKSHUT_RECV 0x00000001 /*%< close read side */ -#define ISC_SOCKSHUT_SEND 0x00000002 /*%< close write side */ -#define ISC_SOCKSHUT_ALL 0x00000003 /*%< close them all */ -/*@}*/ - -/*@{*/ -/*! - * What I/O events to cancel in isc_socket_cancel() calls. - */ -#define ISC_SOCKCANCEL_RECV 0x00000001 /*%< cancel recv */ -#define ISC_SOCKCANCEL_SEND 0x00000002 /*%< cancel send */ -#define ISC_SOCKCANCEL_ACCEPT 0x00000004 /*%< cancel accept */ -#define ISC_SOCKCANCEL_CONNECT 0x00000008 /*%< cancel connect */ -#define ISC_SOCKCANCEL_ALL 0x0000000f /*%< cancel everything */ -/*@}*/ - -/*@{*/ -/*! - * Flags for isc_socket_send() and isc_socket_recv() calls. - */ -#define ISC_SOCKFLAG_IMMEDIATE 0x00000001 /*%< send event only if needed */ -#define ISC_SOCKFLAG_NORETRY 0x00000002 /*%< drop failed UDP sends */ -/*@}*/ - -/*** - *** Socket and Socket Manager Functions - *** - *** Note: all Ensures conditions apply only if the result is success for - *** those functions which return an isc_result. - ***/ - -isc_result_t -isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, - isc_socket_t **socketp); -/*%< - * Create a new 'type' socket managed by 'manager'. - * - * Note: - * - *\li 'pf' is the desired protocol family, e.g. PF_INET or PF_INET6. - * - * Requires: - * - *\li 'manager' is a valid manager - * - *\li 'socketp' is a valid pointer, and *socketp == NULL - * - * Ensures: - * - * '*socketp' is attached to the newly created socket - * - * Returns: - * - *\li #ISC_R_SUCCESS - *\li #ISC_R_NOMEMORY - *\li #ISC_R_NORESOURCES - *\li #ISC_R_UNEXPECTED - */ - -void -isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how); -/*%< - * Cancel pending I/O of the type specified by "how". - * - * Note: if "task" is NULL, then the cancel applies to all tasks using the - * socket. - * - * Requires: - * - * \li "socket" is a valid socket - * - * \li "task" is NULL or a valid task - * - * "how" is a bitmask describing the type of cancellation to perform. - * The type ISC_SOCKCANCEL_ALL will cancel all pending I/O on this - * socket. - * - * \li ISC_SOCKCANCEL_RECV: - * Cancel pending isc_socket_recv() calls. - * - * \li ISC_SOCKCANCEL_SEND: - * Cancel pending isc_socket_send() and isc_socket_sendto() calls. - * - * \li ISC_SOCKCANCEL_ACCEPT: - * Cancel pending isc_socket_accept() calls. - * - * \li ISC_SOCKCANCEL_CONNECT: - * Cancel pending isc_socket_connect() call. - */ - -void -isc_socket_shutdown(isc_socket_t *sock, unsigned int how); -/*%< - * Shutdown 'socket' according to 'how'. - * - * Requires: - * - * \li 'socket' is a valid socket. - * - * \li 'task' is NULL or is a valid task. - * - * \li If 'how' is 'ISC_SOCKSHUT_RECV' or 'ISC_SOCKSHUT_ALL' then - * - * The read queue must be empty. - * - * No further read requests may be made. - * - * \li If 'how' is 'ISC_SOCKSHUT_SEND' or 'ISC_SOCKSHUT_ALL' then - * - * The write queue must be empty. - * - * No further write requests may be made. - */ - -void -isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp); -/*%< - * Attach *socketp to socket. - * - * Requires: - * - * \li 'socket' is a valid socket. - * - * \li 'socketp' points to a NULL socket. - * - * Ensures: - * - * \li *socketp is attached to socket. - */ - -void -isc_socket_detach(isc_socket_t **socketp); -/*%< - * Detach *socketp from its socket. - * - * Requires: - * - * \li 'socketp' points to a valid socket. - * - * \li If '*socketp' is the last reference to the socket, - * then: - * - * There must be no pending I/O requests. - * - * Ensures: - * - * \li *socketp is NULL. - * - * \li If '*socketp' is the last reference to the socket, - * then: - * - * The socket will be shutdown (both reading and writing) - * for all tasks. - * - * All resources used by the socket have been freed - */ - -isc_result_t -isc_socket_open(isc_socket_t *sock); -/*%< - * Open a new socket file descriptor of the given socket structure. It simply - * opens a new descriptor; all of the other parameters including the socket - * type are inherited from the existing socket. This function is provided to - * avoid overhead of destroying and creating sockets when many short-lived - * sockets are frequently opened and closed. When the efficiency is not an - * issue, it should be safer to detach the unused socket and re-create a new - * one. This optimization may not be available for some systems, in which - * case this function will return ISC_R_NOTIMPLEMENTED and must not be used. - * - * Requires: - * - * \li there must be no other reference to this socket. - * - * \li 'socket' is a valid and previously closed by isc_socket_close() - * - * Returns: - * Same as isc_socket_create(). - * \li ISC_R_NOTIMPLEMENTED - */ - -isc_result_t -isc_socket_close(isc_socket_t *sock); -/*%< - * Close a socket file descriptor of the given socket structure. This function - * is provided as an alternative to destroying an unused socket when overhead - * destroying/re-creating sockets can be significant, and is expected to be - * used with isc_socket_open(). This optimization may not be available for some - * systems, in which case this function will return ISC_R_NOTIMPLEMENTED and - * must not be used. - * - * Requires: - * - * \li The socket must have a valid descriptor. - * - * \li There must be no other reference to this socket. - * - * \li There must be no pending I/O requests. - * - * Returns: - * \li #ISC_R_NOTIMPLEMENTED - */ - -isc_result_t -isc_socket_bind(isc_socket_t *sock, const isc_sockaddr_t *addressp, - isc_socket_options_t options); -/*%< - * Bind 'socket' to '*addressp'. - * - * Requires: - * - * \li 'socket' is a valid socket - * - * \li 'addressp' points to a valid isc_sockaddr. - * - * Returns: - * - * \li ISC_R_SUCCESS - * \li ISC_R_NOPERM - * \li ISC_R_ADDRNOTAVAIL - * \li ISC_R_ADDRINUSE - * \li ISC_R_BOUND - * \li ISC_R_UNEXPECTED - */ - -isc_result_t -isc_socket_filter(isc_socket_t *sock, const char *filter); -/*%< - * Inform the kernel that it should perform accept filtering. - * If filter is NULL the current filter will be removed. - */ - -isc_result_t -isc_socket_listen(isc_socket_t *sock, unsigned int backlog); -/*%< - * Set listen mode on the socket. After this call, the only function that - * can be used (other than attach and detach) is isc_socket_accept(). - * - * Notes: - * - * \li 'backlog' is as in the UNIX system call listen() and may be - * ignored by non-UNIX implementations. - * - * \li If 'backlog' is zero, a reasonable system default is used, usually - * SOMAXCONN. - * - * Requires: - * - * \li 'socket' is a valid, bound TCP socket or a valid, bound UNIX socket. - * - * Returns: - * - * \li ISC_R_SUCCESS - * \li ISC_R_UNEXPECTED - */ - -isc_result_t -isc_socket_accept(isc_socket_t *sock, isc_task_t *task, isc_taskaction_t action, - void *arg); -/*%< - * Queue accept event. When a new connection is received, the task will - * get an ISC_SOCKEVENT_NEWCONN event with the sender set to the listen - * socket. The new socket structure is sent inside the isc_socket_newconnev_t - * event type, and is attached to the task 'task'. - * - * REQUIRES: - * \li 'socket' is a valid TCP socket that isc_socket_listen() was called - * on. - * - * \li 'task' is a valid task - * - * \li 'action' is a valid action - * - * RETURNS: - * \li ISC_R_SUCCESS - * \li ISC_R_NOMEMORY - * \li ISC_R_UNEXPECTED - */ - -isc_result_t -isc_socket_connect(isc_socket_t *sock, const isc_sockaddr_t *addressp, - isc_task_t *task, isc_taskaction_t action, void *arg); -/*%< - * Connect 'socket' to peer with address *saddr. When the connection - * succeeds, or when an error occurs, a CONNECT event with action 'action' - * and arg 'arg' will be posted to the event queue for 'task'. - * - * Requires: - * - * \li 'socket' is a valid TCP socket - * - * \li 'addressp' points to a valid isc_sockaddr - * - * \li 'task' is a valid task - * - * \li 'action' is a valid action - * - * Returns: - * - * \li ISC_R_SUCCESS - * \li ISC_R_NOMEMORY - * \li ISC_R_UNEXPECTED - * - * Posted event's result code: - * - * \li ISC_R_SUCCESS - * \li ISC_R_TIMEDOUT - * \li ISC_R_CONNREFUSED - * \li ISC_R_NETUNREACH - * \li ISC_R_UNEXPECTED - */ - -isc_result_t -isc_socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp); -/*%< - * Get the name of the peer connected to 'socket'. - * - * Requires: - * - * \li 'socket' is a valid TCP socket. - * - * Returns: - * - * \li ISC_R_SUCCESS - * \li ISC_R_TOOSMALL - * \li ISC_R_UNEXPECTED - */ - -isc_result_t -isc_socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp); -/*%< - * Get the name of 'socket'. - * - * Requires: - * - * \li 'socket' is a valid socket. - * - * Returns: - * - * \li ISC_R_SUCCESS - * \li ISC_R_TOOSMALL - * \li ISC_R_UNEXPECTED - */ - -/*@{*/ -isc_result_t -isc_socket_recv(isc_socket_t *sock, isc_region_t *region, unsigned int minimum, - isc_task_t *task, isc_taskaction_t action, void *arg); - -isc_result_t -isc_socket_recv2(isc_socket_t *sock, isc_region_t *region, unsigned int minimum, - isc_task_t *task, isc_socketevent_t *event, - unsigned int flags); - -/*! - * Receive from 'socket', storing the results in region. - * - * Notes: - * - *\li Let 'length' refer to the length of 'region' or to the sum of all - * available regions in the list of buffers '*buflist'. - * - *\li If 'minimum' is non-zero and at least that many bytes are read, - * the completion event will be posted to the task 'task.' If minimum - * is zero, the exact number of bytes requested in the region must - * be read for an event to be posted. This only makes sense for TCP - * connections, and is always set to 1 byte for UDP. - * - *\li The read will complete when the desired number of bytes have been - * read, if end-of-input occurs, or if an error occurs. A read done - * event with the given 'action' and 'arg' will be posted to the - * event queue of 'task'. - * - *\li The caller may not modify 'region', the buffers which are passed - * into this function, or any data they refer to until the completion - * event is received. - * - *\li For isc_socket_recv2(): - * 'event' is not NULL, and the non-socket specific fields are - * expected to be initialized. - * - *\li For isc_socket_recv2(): - * The only defined value for 'flags' is ISC_SOCKFLAG_IMMEDIATE. If - * set and the operation completes, the return value will be - * ISC_R_SUCCESS and the event will be filled in and not sent. If the - * operation does not complete, the return value will be - * ISC_R_INPROGRESS and the event will be sent when the operation - * completes. - * - * Requires: - * - *\li 'socket' is a valid, bound socket. - * - *\li For isc_socket_recv(): - * 'region' is a valid region - * - *\li For isc_socket_recvv(): - * 'buflist' is non-NULL, and '*buflist' contain at least one buffer. - * - *\li 'task' is a valid task - * - *\li For isc_socket_recv() and isc_socket_recvv(): - * action != NULL and is a valid action - * - *\li For isc_socket_recv2(): - * event != NULL - * - * Returns: - * - *\li #ISC_R_SUCCESS - *\li #ISC_R_INPROGRESS - *\li #ISC_R_NOMEMORY - *\li #ISC_R_UNEXPECTED - * - * Event results: - * - *\li #ISC_R_SUCCESS - *\li #ISC_R_UNEXPECTED - *\li XXX needs other net-type errors - */ -/*@}*/ - -/*@{*/ -isc_result_t -isc_socket_send(isc_socket_t *sock, isc_region_t *region, isc_task_t *task, - isc_taskaction_t action, void *arg); -isc_result_t -isc_socket_sendto(isc_socket_t *sock, isc_region_t *region, isc_task_t *task, - isc_taskaction_t action, void *arg, - const isc_sockaddr_t *address, struct in6_pktinfo *pktinfo); -isc_result_t -isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region, isc_task_t *task, - const isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, - isc_socketevent_t *event, unsigned int flags); - -/*! - * Send the contents of 'region' to the socket's peer. - * - * Notes: - * - *\li Shutting down the requestor's task *may* result in any - * still pending writes being dropped or completed, depending on the - * underlying OS implementation. - * - *\li If 'action' is NULL, then no completion event will be posted. - * - *\li The caller may not modify 'region', the buffers which are passed - * into this function, or any data they refer to until the completion - * event is received. - * - *\li For isc_socket_sendto2(): - * 'event' is not NULL, and the non-socket specific fields are - * expected to be initialized. - * - *\li For isc_socket_sendto2(): - * The only defined values for 'flags' are ISC_SOCKFLAG_IMMEDIATE - * and ISC_SOCKFLAG_NORETRY. - * - *\li If ISC_SOCKFLAG_IMMEDIATE is set and the operation completes, the - * return value will be ISC_R_SUCCESS and the event will be filled - * in and not sent. If the operation does not complete, the return - * value will be ISC_R_INPROGRESS and the event will be sent when - * the operation completes. - * - *\li ISC_SOCKFLAG_NORETRY can only be set for UDP sockets. If set - * and the send operation fails due to a transient error, the send - * will not be retried and the error will be indicated in the event. - * Using this option along with ISC_SOCKFLAG_IMMEDIATE allows the caller - * to specify a region that is allocated on the stack. - * - * Requires: - * - *\li 'socket' is a valid, bound socket. - * - *\li For isc_socket_send(): - * 'region' is a valid region - * - *\li For isc_socket_sendv() and isc_socket_sendtov(): - * 'buflist' is non-NULL, and '*buflist' contain at least one buffer. - * - *\li 'task' is a valid task - * - *\li For isc_socket_sendv(), isc_socket_sendtov(), isc_socket_send(), and - * isc_socket_sendto(): - * action == NULL or is a valid action - * - *\li For isc_socket_sendto2(): - * event != NULL - * - * Returns: - * - *\li #ISC_R_SUCCESS - *\li #ISC_R_INPROGRESS - *\li #ISC_R_NOMEMORY - *\li #ISC_R_UNEXPECTED - * - * Event results: - * - *\li #ISC_R_SUCCESS - *\li #ISC_R_UNEXPECTED - *\li XXX needs other net-type errors - */ -/*@}*/ - -isc_result_t -isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp); -/*%< - * Returns in "*nsockp" the maximum number of sockets this manager may open. - * - * Requires: - * - *\li '*manager' is a valid isc_socketmgr_t. - *\li 'nsockp' is not NULL. - * - * Returns: - * - *\li #ISC_R_SUCCESS - *\li #ISC_R_NOTIMPLEMENTED - */ - -void -isc_socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats); -/*%< - * Set a general socket statistics counter set 'stats' for 'manager'. - * - * Requires: - * \li 'manager' is valid, hasn't opened any socket, and doesn't have - * stats already set. - * - *\li stats is a valid statistics supporting socket statistics counters - * (see above). - */ - -isc_sockettype_t -isc_socket_gettype(isc_socket_t *sock); -/*%< - * Returns the socket type for "sock." - * - * Requires: - * - *\li "sock" is a valid socket. - */ - -/*@{*/ -void -isc_socket_ipv6only(isc_socket_t *sock, bool yes); -/*%< - * If the socket is an IPv6 socket set/clear the IPV6_IPV6ONLY socket - * option if the host OS supports this option. - * - * Requires: - *\li 'sock' is a valid socket. - */ -/*@}*/ - -void -isc_socket_dscp(isc_socket_t *sock, isc_dscp_t dscp); -/*%< - * Sets the Differentiated Services Code Point (DSCP) field for packets - * transmitted on this socket. If 'dscp' is -1, return immediately. - * - * Requires: - *\li 'sock' is a valid socket. - */ - -isc_socketevent_t * -isc_socket_socketevent(isc_mem_t *mctx, void *sender, isc_eventtype_t eventtype, - isc_taskaction_t action, void *arg); -/*%< - * Get a isc_socketevent_t to be used with isc_socket_sendto2(), etc. - */ - -void -isc_socket_cleanunix(const isc_sockaddr_t *addr, bool active); - -/*%< - * Cleanup UNIX domain sockets in the file-system. If 'active' is true - * then just unlink the socket. If 'active' is false try to determine - * if there is a listener of the socket or not. If no listener is found - * then unlink socket. - * - * Prior to unlinking the path is tested to see if it a socket. - * - * Note: there are a number of race conditions which cannot be avoided - * both in the filesystem and any application using UNIX domain - * sockets (e.g. socket is tested between bind() and listen(), - * the socket is deleted and replaced in the file-system between - * stat() and unlink()). - */ - -isc_result_t -isc_socket_permunix(const isc_sockaddr_t *sockaddr, uint32_t perm, - uint32_t owner, uint32_t group); -/*%< - * Set ownership and file permissions on the UNIX domain socket. - * - * Note: On Solaris this secures the directory containing - * the socket as Solaris do not honour the filesystem - * permissions on the socket. - * - * Requires: - * \li 'sockaddr' to be a valid UNIX domain sockaddr. - * - * Returns: - * \li #ISC_R_SUCCESS - * \li #ISC_R_FAILURE - */ - -void -isc_socket_setname(isc_socket_t *socket, const char *name, void *tag); -/*%< - * Set the name and optional tag for a socket. This allows tracking of the - * owner or purpose for this socket, and is useful for tracing and statistics - * reporting. - */ - -const char * -isc_socket_getname(isc_socket_t *socket); -/*%< - * Get the name associated with a socket, if any. - */ - -void * -isc_socket_gettag(isc_socket_t *socket); -/*%< - * Get the tag associated with a socket, if any. - */ - -int -isc_socket_getfd(isc_socket_t *socket); -/*%< - * Get the file descriptor associated with a socket - */ - -void -isc_socketmgr_setreserved(isc_socketmgr_t *mgr, uint32_t); -/*%< - * Temporary. For use by named only. - */ - -void -isc_socketmgr_maxudp(isc_socketmgr_t *mgr, unsigned int maxudp); -/*%< - * Test interface. Drop UDP packet > 'maxudp'. - */ - -bool -isc_socket_hasreuseport(void); -/*%< - * Return true if there is SO_REUSEPORT support - */ - -#ifdef HAVE_LIBXML2 -int -isc_socketmgr_renderxml(isc_socketmgr_t *mgr, void *writer0); -/*%< - * Render internal statistics and other state into the XML document. - */ -#endif /* HAVE_LIBXML2 */ - -#ifdef HAVE_JSON_C -isc_result_t -isc_socketmgr_renderjson(isc_socketmgr_t *mgr, void *stats0); -/*%< - * Render internal statistics and other state into JSON format. - */ -#endif /* HAVE_JSON_C */ - -/*%< - * See isc_socketmgr_create() above. - */ -typedef isc_result_t (*isc_socketmgrcreatefunc_t)(isc_mem_t *mctx, - isc_socketmgr_t **managerp); - -ISC_LANG_ENDDECLS diff --git a/lib/isc/include/isc/types.h b/lib/isc/include/isc/types.h index 78ed309f12..b68365cd93 100644 --- a/lib/isc/include/isc/types.h +++ b/lib/isc/include/isc/types.h @@ -77,13 +77,10 @@ typedef struct isc_rwlock isc_rwlock_t; /*%< Read Write Lock */ typedef struct isc_sockaddr isc_sockaddr_t; /*%< Socket Address */ typedef ISC_LIST(isc_sockaddr_t) isc_sockaddrlist_t; /*%< Socket Address List * */ -typedef struct isc_socket isc_socket_t; /*%< Socket */ -typedef struct isc_socketevent isc_socketevent_t; /*%< Socket Event */ -typedef struct isc_socketmgr isc_socketmgr_t; /*%< Socket Manager */ -typedef struct isc_stats isc_stats_t; /*%< Statistics */ -typedef int_fast64_t isc_statscounter_t; -typedef struct isc_symtab isc_symtab_t; /*%< Symbol Table */ -typedef struct isc_task isc_task_t; /*%< Task */ +typedef struct isc_stats isc_stats_t; /*%< Statistics */ +typedef int_fast64_t isc_statscounter_t; +typedef struct isc_symtab isc_symtab_t; /*%< Symbol Table */ +typedef struct isc_task isc_task_t; /*%< Task */ typedef ISC_LIST(isc_task_t) isc_tasklist_t; /*%< Task List */ typedef struct isc_taskmgr isc_taskmgr_t; /*%< Task Manager */ typedef struct isc_textregion isc_textregion_t; /*%< Text Region */ diff --git a/lib/isc/managers.c b/lib/isc/managers.c index 628dd33fa9..094e4c0eb8 100644 --- a/lib/isc/managers.c +++ b/lib/isc/managers.c @@ -14,18 +14,15 @@ #include #include "netmgr_p.h" -#include "socket_p.h" #include "task_p.h" #include "timer_p.h" isc_result_t isc_managers_create(isc_mem_t *mctx, size_t workers, size_t quantum, - size_t sockets, isc_nm_t **netmgrp, - isc_taskmgr_t **taskmgrp, isc_timermgr_t **timermgrp, - isc_socketmgr_t **socketmgrp) { + isc_nm_t **netmgrp, isc_taskmgr_t **taskmgrp, + isc_timermgr_t **timermgrp) { isc_result_t result; isc_nm_t *netmgr = NULL; - isc_socketmgr_t *socketmgr = NULL; isc_taskmgr_t *taskmgr = NULL; isc_timermgr_t *timermgr = NULL; @@ -65,29 +62,16 @@ isc_managers_create(isc_mem_t *mctx, size_t workers, size_t quantum, *timermgrp = timermgr; } - REQUIRE(socketmgrp == NULL || *socketmgrp == NULL); - if (socketmgrp != NULL) { - result = isc__socketmgr_create(mctx, &socketmgr, sockets, - workers); - if (result != ISC_R_SUCCESS) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "isc_socketmgr_create() failed: %s", - isc_result_totext(result)); - goto fail; - } - *socketmgrp = socketmgr; - } - return (ISC_R_SUCCESS); fail: - isc_managers_destroy(netmgrp, taskmgrp, timermgrp, socketmgrp); + isc_managers_destroy(netmgrp, taskmgrp, timermgrp); return (result); } void isc_managers_destroy(isc_nm_t **netmgrp, isc_taskmgr_t **taskmgrp, - isc_timermgr_t **timermgrp, isc_socketmgr_t **socketmgrp) { + isc_timermgr_t **timermgrp) { /* * If we have a taskmgr to clean up, then we must also have a netmgr. */ @@ -137,8 +121,4 @@ isc_managers_destroy(isc_nm_t **netmgrp, isc_taskmgr_t **taskmgrp, INSIST(*timermgrp != NULL); isc__timermgr_destroy(timermgrp); } - if (socketmgrp != NULL) { - INSIST(*socketmgrp != NULL); - isc__socketmgr_destroy(socketmgrp); - } } diff --git a/lib/isc/netmgr/http.c b/lib/isc/netmgr/http.c index be3f90821c..d37d788e87 100644 --- a/lib/isc/netmgr/http.c +++ b/lib/isc/netmgr/http.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index c776aa08b3..2c5305f37b 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -240,6 +240,8 @@ typedef enum isc__netievent_type { netievent_udpread, netievent_udpcancel, + netievent_routeconnect, + netievent_tcpconnect, netievent_tcpclose, netievent_tcpsend, @@ -967,6 +969,8 @@ struct isc_nmsocket { atomic_bool active; atomic_bool destroying; + bool route_sock; + /*% * Socket is closed if it's not active and all the possible * callbacks were fired, there are no active handles, etc. @@ -1351,6 +1355,12 @@ isc__nm_async_udpclose(isc__networker_t *worker, isc__netievent_t *ev0); * Callback handlers for asynchronous UDP events (listen, stoplisten, send). */ +void +isc__nm_async_routeconnect(isc__networker_t *worker, isc__netievent_t *ev0); +/*%< + * Callback handler for route socket events. + */ + void isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region, isc_nm_cb_t cb, void *cbarg); @@ -1880,6 +1890,8 @@ NETIEVENT_SOCKET_TYPE(tcpstartread); NETIEVENT_SOCKET_REQ_TYPE(tlssend); NETIEVENT_SOCKET_REQ_TYPE(udpconnect); +NETIEVENT_SOCKET_REQ_TYPE(routeconnect); + NETIEVENT_SOCKET_REQ_RESULT_TYPE(connectcb); NETIEVENT_SOCKET_REQ_RESULT_TYPE(readcb); NETIEVENT_SOCKET_REQ_RESULT_TYPE(sendcb); @@ -1946,6 +1958,8 @@ NETIEVENT_SOCKET_REQ_DECL(tcpsend); NETIEVENT_SOCKET_REQ_DECL(tlssend); NETIEVENT_SOCKET_REQ_DECL(udpconnect); +NETIEVENT_SOCKET_REQ_DECL(routeconnect); + NETIEVENT_SOCKET_REQ_RESULT_DECL(connectcb); NETIEVENT_SOCKET_REQ_RESULT_DECL(readcb); NETIEVENT_SOCKET_REQ_RESULT_DECL(sendcb); diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 259e097da2..9b884e0a93 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -128,6 +128,13 @@ static const isc_statscounter_t unixstatsindex[] = { static thread_local int isc__nm_tid_v = ISC_NETMGR_TID_UNKNOWN; +/* + * Set by the -T dscp option on the command line. If set to a value + * other than -1, we check to make sure DSCP values match it, and + * assert if not. (Not currently in use.) + */ +int isc_dscp_check_value = -1; + static void nmsocket_maybe_destroy(isc_nmsocket_t *sock FLARG); static void @@ -913,6 +920,8 @@ process_netievent(isc__networker_t *worker, isc__netievent_t *ievent) { NETIEVENT_CASE(udpcancel); NETIEVENT_CASE(udpclose); + NETIEVENT_CASE(routeconnect); + NETIEVENT_CASE(tcpaccept); NETIEVENT_CASE(tcpconnect); NETIEVENT_CASE(tcplisten); @@ -1072,6 +1081,7 @@ NETIEVENT_SOCKET_REQ_DEF(tcpconnect); NETIEVENT_SOCKET_REQ_DEF(tcpsend); NETIEVENT_SOCKET_REQ_DEF(tlssend); NETIEVENT_SOCKET_REQ_DEF(udpconnect); +NETIEVENT_SOCKET_REQ_DEF(routeconnect); NETIEVENT_SOCKET_REQ_RESULT_DEF(connectcb); NETIEVENT_SOCKET_REQ_RESULT_DEF(readcb); NETIEVENT_SOCKET_REQ_RESULT_DEF(sendcb); @@ -1447,12 +1457,8 @@ isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, REQUIRE(sock != NULL); REQUIRE(mgr != NULL); - REQUIRE(iface != NULL); - - family = iface->type.sa.sa_family; *sock = (isc_nmsocket_t){ .type = type, - .iface = *iface, .fd = -1, .ah_size = 32, .inactivehandles = isc_astack_new( @@ -1460,6 +1466,13 @@ isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, .inactivereqs = isc_astack_new( mgr->mctx, ISC_NM_REQS_STACK_SIZE) }; + if (iface != NULL) { + family = iface->type.sa.sa_family; + sock->iface = *iface; + } else { + family = AF_UNSPEC; + } + #if NETMGR_TRACE sock->backtrace_size = isc_backtrace(sock->backtrace, TRACE_SIZE); ISC_LINK_INIT(sock, active_link); @@ -1492,6 +1505,12 @@ isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, case AF_INET6: sock->statsindex = udp6statsindex; break; + case AF_UNSPEC: + /* + * Route sockets are AF_UNSPEC, and don't + * have stats counters. + */ + break; default: INSIST(0); ISC_UNREACHABLE(); diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index 3982d49e7f..e4b4b8926d 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -31,6 +31,32 @@ #include "netmgr-int.h" #include "uv-compat.h" +#ifdef HAVE_NET_ROUTE_H +#include +#if defined(RTM_VERSION) && defined(RTM_NEWADDR) && defined(RTM_DELADDR) +#define USE_ROUTE_SOCKET 1 +#define ROUTE_SOCKET_PF PF_ROUTE +#define ROUTE_SOCKET_PROTOCOL 0 +#define MSGHDR rt_msghdr +#define MSGTYPE rtm_type +#endif /* if defined(RTM_VERSION) && defined(RTM_NEWADDR) && \ + * defined(RTM_DELADDR) */ +#endif /* ifdef HAVE_NET_ROUTE_H */ + +#if defined(HAVE_LINUX_NETLINK_H) && defined(HAVE_LINUX_RTNETLINK_H) +#include +#include +#if defined(RTM_NEWADDR) && defined(RTM_DELADDR) +#define USE_ROUTE_SOCKET 1 +#define USE_NETLINK 1 +#define ROUTE_SOCKET_PF PF_NETLINK +#define ROUTE_SOCKET_PROTOCOL NETLINK_ROUTE +#define MSGHDR nlmsghdr +#define MSGTYPE nlmsg_type +#endif /* if defined(RTM_NEWADDR) && defined(RTM_DELADDR) */ +#endif /* if defined(HAVE_LINUX_NETLINK_H) && defined(HAVE_LINUX_RTNETLINK_H) \ + */ + static isc_result_t udp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, isc_sockaddr_t *peer); @@ -189,6 +215,197 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nm_recv_cb_t cb, return (result); } +#ifdef USE_ROUTE_SOCKET +static isc_result_t +route_socket(uv_os_sock_t *fdp) { + isc_result_t result; + uv_os_sock_t fd; +#ifdef USE_NETLINK + struct sockaddr_nl sa; + int r; +#endif + + result = isc__nm_socket(ROUTE_SOCKET_PF, SOCK_RAW, + ROUTE_SOCKET_PROTOCOL, &fd); + if (result != ISC_R_SUCCESS) { + return (result); + } + +#ifdef USE_NETLINK + sa.nl_family = PF_NETLINK; + sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR; + r = bind(fd, (struct sockaddr *)&sa, sizeof(sa)); + if (r < 0) { + isc__nm_closesocket(fd); + return (isc_errno_toresult(r)); + } +#endif + + *fdp = fd; + return (ISC_R_SUCCESS); +} + +static isc_result_t +route_connect_direct(isc_nmsocket_t *sock) { + isc__networker_t *worker = NULL; + isc_result_t result = ISC_R_UNSET; + int r; + + REQUIRE(isc__nm_in_netthread()); + REQUIRE(sock->tid == isc_nm_tid()); + + worker = &sock->mgr->workers[isc_nm_tid()]; + + atomic_store(&sock->connecting, true); + + r = uv_udp_init(&worker->loop, &sock->uv_handle.udp); + RUNTIME_CHECK(r == 0); + uv_handle_set_data(&sock->uv_handle.handle, sock); + + r = uv_timer_init(&worker->loop, &sock->timer); + RUNTIME_CHECK(r == 0); + uv_handle_set_data((uv_handle_t *)&sock->timer, sock); + + if (isc__nm_closing(sock)) { + result = ISC_R_SHUTTINGDOWN; + goto error; + } + + r = uv_udp_open(&sock->uv_handle.udp, sock->fd); + if (r != 0) { + goto done; + } + + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + + atomic_store(&sock->connecting, false); + atomic_store(&sock->connected, true); + +done: + result = isc__nm_uverr2result(r); +error: + + LOCK(&sock->lock); + sock->result = result; + SIGNAL(&sock->cond); + if (!atomic_load(&sock->active)) { + WAIT(&sock->scond, &sock->lock); + } + INSIST(atomic_load(&sock->active)); + UNLOCK(&sock->lock); + + return (result); +} + +/* + * Asynchronous 'udpconnect' call handler: open a new UDP socket and + * call the 'open' callback with a handle. + */ +void +isc__nm_async_routeconnect(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_routeconnect_t *ievent = + (isc__netievent_routeconnect_t *)ev0; + isc_nmsocket_t *sock = ievent->sock; + isc__nm_uvreq_t *req = ievent->req; + isc_result_t result; + + UNUSED(worker); + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_udpsocket); + REQUIRE(sock->parent == NULL); + REQUIRE(sock->tid == isc_nm_tid()); + + result = route_connect_direct(sock); + if (result != ISC_R_SUCCESS) { + atomic_store(&sock->active, false); + isc__nm_udp_close(sock); + isc__nm_connectcb(sock, req, result, true); + } else { + /* + * The callback has to be called after the socket has been + * initialized + */ + isc__nm_connectcb(sock, req, ISC_R_SUCCESS, true); + } + + /* + * The sock is now attached to the handle. + */ + isc__nmsocket_detach(&sock); +} +#endif /* USE_ROUTE_SOCKET */ + +isc_result_t +isc_nm_routeconnect(isc_nm_t *mgr, isc_nm_cb_t cb, void *cbarg, + size_t extrahandlesize) { +#ifdef USE_ROUTE_SOCKET + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *sock = NULL; + isc__netievent_udpconnect_t *event = NULL; + isc__nm_uvreq_t *req = NULL; + + REQUIRE(VALID_NM(mgr)); + + sock = isc_mem_get(mgr->mctx, sizeof(*sock)); + isc__nmsocket_init(sock, mgr, isc_nm_udpsocket, NULL); + + sock->connect_cb = cb; + sock->connect_cbarg = cbarg; + sock->extrahandlesize = extrahandlesize; + sock->result = ISC_R_UNSET; + atomic_init(&sock->client, true); + sock->route_sock = true; + + req = isc__nm_uvreq_get(mgr, sock); + req->cb.connect = cb; + req->cbarg = cbarg; + req->handle = isc__nmhandle_get(sock, NULL, NULL); + + result = route_socket(&sock->fd); + if (result != ISC_R_SUCCESS) { + if (isc__nm_in_netthread()) { + sock->tid = isc_nm_tid(); + } + isc__nmsocket_clearcb(sock); + isc__nm_connectcb(sock, req, result, true); + atomic_store(&sock->closed, true); + isc__nmsocket_detach(&sock); + return (result); + } + + event = isc__nm_get_netievent_routeconnect(mgr, sock, req); + + if (isc__nm_in_netthread()) { + atomic_store(&sock->active, true); + sock->tid = isc_nm_tid(); + isc__nm_async_routeconnect(&mgr->workers[sock->tid], + (isc__netievent_t *)event); + isc__nm_put_netievent_routeconnect(mgr, event); + } else { + atomic_init(&sock->active, false); + sock->tid = 0; + isc__nm_enqueue_ievent(&mgr->workers[sock->tid], + (isc__netievent_t *)event); + } + LOCK(&sock->lock); + while (sock->result == ISC_R_UNSET) { + WAIT(&sock->cond, &sock->lock); + } + atomic_store(&sock->active, true); + BROADCAST(&sock->scond); + UNLOCK(&sock->lock); + + return (sock->result); +#else /* USE_ROUTE_SOCKET */ + UNUSED(mgr); + UNUSED(cb); + UNUSED(cbarg); + UNUSED(extrahandlesize); + return (ISC_R_NOTIMPLEMENTED); +#endif /* USE_ROUTE_SOCKET */ +} + /* * Asynchronous 'udplisten' call handler: start listening on a UDP socket. */ @@ -338,8 +555,8 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, isc__nm_uvreq_t *req = NULL; uint32_t maxudp; bool free_buf; - isc_sockaddr_t sockaddr; isc_result_t result; + isc_sockaddr_t sockaddr, *sa = NULL; REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->tid == isc_nm_tid()); @@ -398,10 +615,13 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, goto free; } - result = isc_sockaddr_fromsockaddr(&sockaddr, addr); - RUNTIME_CHECK(result == ISC_R_SUCCESS); + if (!sock->route_sock) { + result = isc_sockaddr_fromsockaddr(&sockaddr, addr); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + sa = &sockaddr; + } - req = isc__nm_get_read_req(sock, &sockaddr); + req = isc__nm_get_read_req(sock, sa); /* * The callback will be called synchronously, because result is diff --git a/lib/isc/socket.c b/lib/isc/socket.c deleted file mode 100644 index 1f3b107bf9..0000000000 --- a/lib/isc/socket.c +++ /dev/null @@ -1,5445 +0,0 @@ -/* - * Copyright (C) Internet Systems Consortium, Inc. ("ISC") - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, you can obtain one at https://mozilla.org/MPL/2.0/. - * - * See the COPYRIGHT file distributed with this work for additional - * information regarding copyright ownership. - */ - -/*! \file */ - -#include -#include -#include -#include -#include -#include -#if defined(HAVE_SYS_SYSCTL_H) && !defined(__linux__) -#include -#endif /* if defined(HAVE_SYS_SYSCTL_H) && !defined(__linux__) */ -#include -#include - -#if defined(HAVE_LINUX_NETLINK_H) && defined(HAVE_LINUX_RTNETLINK_H) -#include -#include -#endif /* if defined(HAVE_LINUX_NETLINK_H) && defined(HAVE_LINUX_RTNETLINK_H) \ - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_KQUEUE -#include -#endif /* ifdef HAVE_KQUEUE */ -#ifdef HAVE_EPOLL_CREATE1 -#include -#endif /* ifdef HAVE_EPOLL_CREATE1 */ -#if defined(HAVE_SYS_DEVPOLL_H) -#include -#elif defined(HAVE_DEVPOLL_H) -#include -#endif /* if defined(HAVE_SYS_DEVPOLL_H) */ - -#include - -#include "errno2result.h" -#include "socket_p.h" - -#ifdef ENABLE_TCP_FASTOPEN -#include -#endif /* ifdef ENABLE_TCP_FASTOPEN */ - -#ifdef HAVE_JSON_C -#include -#endif /* HAVE_JSON_C */ - -#ifdef HAVE_LIBXML2 -#include -#define ISC_XMLCHAR (const xmlChar *) -#endif /* HAVE_LIBXML2 */ - -/*% - * Choose the most preferable multiplex method. - */ -#if defined(HAVE_KQUEUE) -#define USE_KQUEUE -#elif defined(HAVE_EPOLL_CREATE1) -#define USE_EPOLL -#elif defined(HAVE_SYS_DEVPOLL_H) || defined(HAVE_DEVPOLL_H) -#define USE_DEVPOLL -typedef struct { - unsigned int want_read : 1, want_write : 1; -} pollinfo_t; -#else /* if defined(HAVE_KQUEUE) */ -#define USE_SELECT -#endif /* HAVE_KQUEUE */ - -/* - * Set by the -T dscp option on the command line. If set to a value - * other than -1, we check to make sure DSCP values match it, and - * assert if not. - */ -int isc_dscp_check_value = -1; - -/*% - * Maximum number of allowable open sockets. This is also the maximum - * allowable socket file descriptor. - * - * Care should be taken before modifying this value for select(): - * The API standard doesn't ensure select() accept more than (the system default - * of) FD_SETSIZE descriptors, and the default size should in fact be fine in - * the vast majority of cases. This constant should therefore be increased only - * when absolutely necessary and possible, i.e., the server is exhausting all - * available file descriptors (up to FD_SETSIZE) and the select() function - * and FD_xxx macros support larger values than FD_SETSIZE (which may not - * always by true, but we keep using some of them to ensure as much - * portability as possible). Note also that overall server performance - * may be rather worsened with a larger value of this constant due to - * inherent scalability problems of select(). - * - * As a special note, this value shouldn't have to be touched if - * this is a build for an authoritative only DNS server. - */ -#ifndef ISC_SOCKET_MAXSOCKETS -#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) -#ifdef TUNE_LARGE -#define ISC_SOCKET_MAXSOCKETS 21000 -#else /* ifdef TUNE_LARGE */ -#define ISC_SOCKET_MAXSOCKETS 4096 -#endif /* TUNE_LARGE */ -#elif defined(USE_SELECT) -#define ISC_SOCKET_MAXSOCKETS FD_SETSIZE -#endif /* USE_KQUEUE... */ -#endif /* ISC_SOCKET_MAXSOCKETS */ - -#ifdef USE_SELECT -/*% - * Mac OS X needs a special definition to support larger values in select(). - * We always define this because a larger value can be specified run-time. - */ -#ifdef __APPLE__ -#define _DARWIN_UNLIMITED_SELECT -#endif /* __APPLE__ */ -#endif /* USE_SELECT */ - -#ifdef ISC_SOCKET_USE_POLLWATCH -/*% - * If this macro is defined, enable workaround for a Solaris /dev/poll kernel - * bug: DP_POLL ioctl could keep sleeping even if socket I/O is possible for - * some of the specified FD. The idea is based on the observation that it's - * likely for a busy server to keep receiving packets. It specifically works - * as follows: the socket watcher is first initialized with the state of - * "poll_idle". While it's in the idle state it keeps sleeping until a socket - * event occurs. When it wakes up for a socket I/O event, it moves to the - * poll_active state, and sets the poll timeout to a short period - * (ISC_SOCKET_POLLWATCH_TIMEOUT msec). If timeout occurs in this state, the - * watcher goes to the poll_checking state with the same timeout period. - * In this state, the watcher tries to detect whether this is a break - * during intermittent events or the kernel bug is triggered. If the next - * polling reports an event within the short period, the previous timeout is - * likely to be a kernel bug, and so the watcher goes back to the active state. - * Otherwise, it moves to the idle state again. - * - * It's not clear whether this is a thread-related bug, but since we've only - * seen this with threads, this workaround is used only when enabling threads. - */ - -typedef enum { poll_idle, poll_active, poll_checking } pollstate_t; - -#ifndef ISC_SOCKET_POLLWATCH_TIMEOUT -#define ISC_SOCKET_POLLWATCH_TIMEOUT 10 -#endif /* ISC_SOCKET_POLLWATCH_TIMEOUT */ -#endif /* ISC_SOCKET_USE_POLLWATCH */ - -/*% - * Per-FD lock buckets, we shuffle them around a bit as FDs come in herds. - */ -#define FDLOCK_BITS 10 -#define FDLOCK_COUNT (1 << FDLOCK_BITS) -#define FDLOCK_ID(fd) \ - (((fd) % (FDLOCK_COUNT) >> (FDLOCK_BITS / 2)) | \ - (((fd) << (FDLOCK_BITS / 2)) % (FDLOCK_COUNT))) - -/*% - * Maximum number of events communicated with the kernel. There should normally - * be no need for having a large number. - */ -#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) -#ifndef ISC_SOCKET_MAXEVENTS -#ifdef TUNE_LARGE -#define ISC_SOCKET_MAXEVENTS 2048 -#else /* ifdef TUNE_LARGE */ -#define ISC_SOCKET_MAXEVENTS 64 -#endif /* TUNE_LARGE */ -#endif /* ifndef ISC_SOCKET_MAXEVENTS */ -#endif /* if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) \ - * */ - -/*% - * Some systems define the socket length argument as an int, some as size_t, - * some as socklen_t. This is here so it can be easily changed if needed. - */ -#ifndef socklen_t -#define socklen_t unsigned int -#endif /* ifndef socklen_t */ - -/*% - * Define what the possible "soft" errors can be. These are non-fatal returns - * of various network related functions, like recv() and so on. - * - * For some reason, BSDI (and perhaps others) will sometimes return <0 - * from recv() but will have errno==0. This is broken, but we have to - * work around it here. - */ -#define SOFT_ERROR(e) \ - ((e) == EAGAIN || (e) == EWOULDBLOCK || (e) == ENOBUFS || \ - (e) == EINTR || (e) == 0) - -#define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x) - -/*!< - * DLVL(90) -- Function entry/exit and other tracing. - * DLVL(70) -- Socket "correctness" -- including returning of events, etc. - * DLVL(60) -- Socket data send/receive - * DLVL(50) -- Event tracing, including receiving/sending completion events. - * DLVL(20) -- Socket creation/destruction. - */ -#define TRACE_LEVEL 90 -#define CORRECTNESS_LEVEL 70 -#define IOEVENT_LEVEL 60 -#define EVENT_LEVEL 50 -#define CREATION_LEVEL 20 - -#define TRACE DLVL(TRACE_LEVEL) -#define CORRECTNESS DLVL(CORRECTNESS_LEVEL) -#define IOEVENT DLVL(IOEVENT_LEVEL) -#define EVENT DLVL(EVENT_LEVEL) -#define CREATION DLVL(CREATION_LEVEL) - -typedef isc_event_t intev_t; - -#define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o') -#define VALID_SOCKET(s) ISC_MAGIC_VALID(s, SOCKET_MAGIC) - -/*! - * IPv6 control information. If the socket is an IPv6 socket we want - * to collect the destination address and interface so the client can - * set them on outgoing packets. - */ -#ifndef USE_CMSG -#define USE_CMSG 1 -#endif /* ifndef USE_CMSG */ - -/*% - * NetBSD and FreeBSD can timestamp packets. XXXMLG Should we have - * a setsockopt() like interface to request timestamps, and if the OS - * doesn't do it for us, call gettimeofday() on every UDP receive? - */ -#ifdef SO_TIMESTAMP -#ifndef USE_CMSG -#define USE_CMSG 1 -#endif /* ifndef USE_CMSG */ -#endif /* ifdef SO_TIMESTAMP */ - -#if defined(SO_RCVBUF) && defined(ISC_RECV_BUFFER_SIZE) -#define SET_RCVBUF -#endif - -#if defined(SO_SNDBUF) && defined(ISC_SEND_BUFFER_SIZE) -#define SET_SNDBUF -#endif - -/*% - * Instead of calculating the cmsgbuf lengths every time we take - * a rule of thumb approach - sizes are taken from x86_64 linux, - * multiplied by 2, everything should fit. Those sizes are not - * large enough to cause any concern. - */ -#if defined(USE_CMSG) -#define CMSG_SP_IN6PKT 40 -#else /* if defined(USE_CMSG) */ -#define CMSG_SP_IN6PKT 0 -#endif /* if defined(USE_CMSG) */ - -#if defined(USE_CMSG) && defined(SO_TIMESTAMP) -#define CMSG_SP_TIMESTAMP 32 -#else /* if defined(USE_CMSG) && defined(SO_TIMESTAMP) */ -#define CMSG_SP_TIMESTAMP 0 -#endif /* if defined(USE_CMSG) && defined(SO_TIMESTAMP) */ - -#if defined(USE_CMSG) && (defined(IPV6_TCLASS) || defined(IP_TOS)) -#define CMSG_SP_TCTOS 24 -#else /* if defined(USE_CMSG) && (defined(IPV6_TCLASS) || defined(IP_TOS)) */ -#define CMSG_SP_TCTOS 0 -#endif /* if defined(USE_CMSG) && (defined(IPV6_TCLASS) || defined(IP_TOS)) */ - -#define CMSG_SP_INT 24 - -/* Align cmsg buffers to be safe on SPARC etc. */ -#define RECVCMSGBUFLEN \ - ISC_ALIGN(2 * (CMSG_SP_IN6PKT + CMSG_SP_TIMESTAMP + CMSG_SP_TCTOS) + \ - 1, \ - sizeof(void *)) -#define SENDCMSGBUFLEN \ - ISC_ALIGN(2 * (CMSG_SP_IN6PKT + CMSG_SP_INT + CMSG_SP_TCTOS) + 1, \ - sizeof(void *)) - -/*% - * The number of times a send operation is repeated if the result is EINTR. - */ -#define NRETRIES 10 - -typedef struct isc__socketthread isc__socketthread_t; - -#define NEWCONNSOCK(ev) ((ev)->newsocket) - -struct isc_socket { - /* Not locked. */ - unsigned int magic; - isc_socketmgr_t *manager; - isc_mutex_t lock; - isc_sockettype_t type; - const isc_statscounter_t *statsindex; - isc_refcount_t references; - - /* Locked by socket lock. */ - ISC_LINK(isc_socket_t) link; - int fd; - int pf; - int threadid; - char name[16]; - void *tag; - - ISC_LIST(isc_socketevent_t) send_list; - ISC_LIST(isc_socketevent_t) recv_list; - ISC_LIST(isc_socket_newconnev_t) accept_list; - ISC_LIST(isc_socket_connev_t) connect_list; - - isc_sockaddr_t peer_address; /* remote address */ - - unsigned int listener : 1, /* listener socket */ - connected : 1, connecting : 1, /* connect pending - * */ - bound : 1, /* bound to local addr */ - active : 1, /* currently active */ - pktdscp : 1; /* per packet dscp */ - -#ifdef ISC_PLATFORM_RECVOVERFLOW - unsigned char overflow; /* used for MSG_TRUNC fake */ -#endif /* ifdef ISC_PLATFORM_RECVOVERFLOW */ - - unsigned int dscp; -}; - -#define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g') -#define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC) - -struct isc_socketmgr { - /* Not locked. */ - unsigned int magic; - isc_mem_t *mctx; - isc_mutex_t lock; - isc_stats_t *stats; - int nthreads; - isc__socketthread_t *threads; - unsigned int maxsocks; - /* Locked by manager lock. */ - ISC_LIST(isc_socket_t) socklist; - int reserved; /* unlocked */ - isc_condition_t shutdown_ok; - size_t maxudp; -}; - -struct isc__socketthread { - isc_socketmgr_t *manager; - int threadid; - isc_thread_t thread; - int pipe_fds[2]; - isc_mutex_t *fdlock; - /* Locked by fdlock. */ - isc_socket_t **fds; - int *fdstate; -#ifdef USE_KQUEUE - int kqueue_fd; - int nevents; - struct kevent *events; -#endif /* USE_KQUEUE */ -#ifdef USE_EPOLL - int epoll_fd; - int nevents; - struct epoll_event *events; - uint32_t *epoll_events; -#endif /* USE_EPOLL */ -#ifdef USE_DEVPOLL - int devpoll_fd; - isc_resourcevalue_t open_max; - unsigned int calls; - int nevents; - struct pollfd *events; - pollinfo_t *fdpollinfo; -#endif /* USE_DEVPOLL */ -#ifdef USE_SELECT - int fd_bufsize; - fd_set *read_fds; - fd_set *read_fds_copy; - fd_set *write_fds; - fd_set *write_fds_copy; - int maxfd; -#endif /* USE_SELECT */ -}; - -#define CLOSED 0 /* this one must be zero */ -#define MANAGED 1 -#define CLOSE_PENDING 2 - -/* - * send() and recv() iovec counts - */ -#define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER) -#ifdef ISC_PLATFORM_RECVOVERFLOW -#define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER + 1) -#else /* ifdef ISC_PLATFORM_RECVOVERFLOW */ -#define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER) -#endif /* ifdef ISC_PLATFORM_RECVOVERFLOW */ - -static isc_result_t -socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type, - isc_socket_t **socketp); -static void -send_recvdone_event(isc_socket_t *, isc_socketevent_t **); -static void -send_senddone_event(isc_socket_t *, isc_socketevent_t **); -static void -send_connectdone_event(isc_socket_t *, isc_socket_connev_t **); -static void -free_socket(isc_socket_t **); -static isc_result_t -allocate_socket(isc_socketmgr_t *, isc_sockettype_t, isc_socket_t **); -static void -destroy(isc_socket_t **); -static void -internal_accept(isc_socket_t *); -static void -internal_connect(isc_socket_t *); -static void -internal_recv(isc_socket_t *); -static void -internal_send(isc_socket_t *); -static void -process_cmsg(isc_socket_t *, struct msghdr *, isc_socketevent_t *); -static void -build_msghdr_send(isc_socket_t *, char *, isc_socketevent_t *, struct msghdr *, - struct iovec *, size_t *); -static void -build_msghdr_recv(isc_socket_t *, char *, isc_socketevent_t *, struct msghdr *, - struct iovec *, size_t *); -static bool -process_ctlfd(isc__socketthread_t *thread); -static void -setdscp(isc_socket_t *sock, isc_dscp_t dscp); - -#define SELECT_POKE_SHUTDOWN (-1) -#define SELECT_POKE_NOTHING (-2) -#define SELECT_POKE_READ (-3) -#define SELECT_POKE_ACCEPT (-3) /*%< Same as _READ */ -#define SELECT_POKE_WRITE (-4) -#define SELECT_POKE_CONNECT (-4) /*%< Same as _WRITE */ -#define SELECT_POKE_CLOSE (-5) - -/*% - * Shortcut index arrays to get access to statistics counters. - */ -enum { - STATID_OPEN = 0, - STATID_OPENFAIL = 1, - STATID_CLOSE = 2, - STATID_BINDFAIL = 3, - STATID_CONNECTFAIL = 4, - STATID_CONNECT = 5, - STATID_ACCEPTFAIL = 6, - STATID_ACCEPT = 7, - STATID_SENDFAIL = 8, - STATID_RECVFAIL = 9, - STATID_ACTIVE = 10 -}; -static const isc_statscounter_t udp4statsindex[] = { - isc_sockstatscounter_udp4open, - isc_sockstatscounter_udp4openfail, - isc_sockstatscounter_udp4close, - isc_sockstatscounter_udp4bindfail, - isc_sockstatscounter_udp4connectfail, - isc_sockstatscounter_udp4connect, - -1, - -1, - isc_sockstatscounter_udp4sendfail, - isc_sockstatscounter_udp4recvfail, - isc_sockstatscounter_udp4active -}; -static const isc_statscounter_t udp6statsindex[] = { - isc_sockstatscounter_udp6open, - isc_sockstatscounter_udp6openfail, - isc_sockstatscounter_udp6close, - isc_sockstatscounter_udp6bindfail, - isc_sockstatscounter_udp6connectfail, - isc_sockstatscounter_udp6connect, - -1, - -1, - isc_sockstatscounter_udp6sendfail, - isc_sockstatscounter_udp6recvfail, - isc_sockstatscounter_udp6active -}; -static const isc_statscounter_t tcp4statsindex[] = { - isc_sockstatscounter_tcp4open, isc_sockstatscounter_tcp4openfail, - isc_sockstatscounter_tcp4close, isc_sockstatscounter_tcp4bindfail, - isc_sockstatscounter_tcp4connectfail, isc_sockstatscounter_tcp4connect, - isc_sockstatscounter_tcp4acceptfail, isc_sockstatscounter_tcp4accept, - isc_sockstatscounter_tcp4sendfail, isc_sockstatscounter_tcp4recvfail, - isc_sockstatscounter_tcp4active -}; -static const isc_statscounter_t tcp6statsindex[] = { - isc_sockstatscounter_tcp6open, isc_sockstatscounter_tcp6openfail, - isc_sockstatscounter_tcp6close, isc_sockstatscounter_tcp6bindfail, - isc_sockstatscounter_tcp6connectfail, isc_sockstatscounter_tcp6connect, - isc_sockstatscounter_tcp6acceptfail, isc_sockstatscounter_tcp6accept, - isc_sockstatscounter_tcp6sendfail, isc_sockstatscounter_tcp6recvfail, - isc_sockstatscounter_tcp6active -}; -static const isc_statscounter_t unixstatsindex[] = { - isc_sockstatscounter_unixopen, isc_sockstatscounter_unixopenfail, - isc_sockstatscounter_unixclose, isc_sockstatscounter_unixbindfail, - isc_sockstatscounter_unixconnectfail, isc_sockstatscounter_unixconnect, - isc_sockstatscounter_unixacceptfail, isc_sockstatscounter_unixaccept, - isc_sockstatscounter_unixsendfail, isc_sockstatscounter_unixrecvfail, - isc_sockstatscounter_unixactive -}; -static const isc_statscounter_t rawstatsindex[] = { - isc_sockstatscounter_rawopen, - isc_sockstatscounter_rawopenfail, - isc_sockstatscounter_rawclose, - -1, - -1, - -1, - -1, - -1, - -1, - isc_sockstatscounter_rawrecvfail, - isc_sockstatscounter_rawactive -}; - -static int -gen_threadid(isc_socket_t *sock); - -static int -gen_threadid(isc_socket_t *sock) { - return (sock->fd % sock->manager->nthreads); -} - -static void -manager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category, - isc_logmodule_t *module, int level, const char *fmt, ...) - ISC_FORMAT_PRINTF(5, 6); -static void -manager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category, - isc_logmodule_t *module, int level, const char *fmt, ...) { - char msgbuf[2048]; - va_list ap; - - if (!isc_log_wouldlog(isc_lctx, level)) { - return; - } - - va_start(ap, fmt); - vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); - va_end(ap); - - isc_log_write(isc_lctx, category, module, level, "sockmgr %p: %s", - sockmgr, msgbuf); -} - -static void -thread_log(isc__socketthread_t *thread, isc_logcategory_t *category, - isc_logmodule_t *module, int level, const char *fmt, ...) - ISC_FORMAT_PRINTF(5, 6); -static void -thread_log(isc__socketthread_t *thread, isc_logcategory_t *category, - isc_logmodule_t *module, int level, const char *fmt, ...) { - char msgbuf[2048]; - va_list ap; - - if (!isc_log_wouldlog(isc_lctx, level)) { - return; - } - - va_start(ap, fmt); - vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); - va_end(ap); - - isc_log_write(isc_lctx, category, module, level, - "sockmgr %p thread %d: %s", thread->manager, - thread->threadid, msgbuf); -} - -static void -socket_log(isc_socket_t *sock, const isc_sockaddr_t *address, - isc_logcategory_t *category, isc_logmodule_t *module, int level, - const char *fmt, ...) ISC_FORMAT_PRINTF(6, 7); -static void -socket_log(isc_socket_t *sock, const isc_sockaddr_t *address, - isc_logcategory_t *category, isc_logmodule_t *module, int level, - const char *fmt, ...) { - char msgbuf[2048]; - char peerbuf[ISC_SOCKADDR_FORMATSIZE]; - va_list ap; - - if (!isc_log_wouldlog(isc_lctx, level)) { - return; - } - - va_start(ap, fmt); - vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); - va_end(ap); - - if (address == NULL) { - isc_log_write(isc_lctx, category, module, level, - "socket %p: %s", sock, msgbuf); - } else { - isc_sockaddr_format(address, peerbuf, sizeof(peerbuf)); - isc_log_write(isc_lctx, category, module, level, - "socket %p %s: %s", sock, peerbuf, msgbuf); - } -} - -/*% - * Increment socket-related statistics counters. - */ -static inline void -inc_stats(isc_stats_t *stats, isc_statscounter_t counterid) { - REQUIRE(counterid != -1); - - if (stats != NULL) { - isc_stats_increment(stats, counterid); - } -} - -/*% - * Decrement socket-related statistics counters. - */ -static inline void -dec_stats(isc_stats_t *stats, isc_statscounter_t counterid) { - REQUIRE(counterid != -1); - - if (stats != NULL) { - isc_stats_decrement(stats, counterid); - } -} - -static inline isc_result_t -watch_fd(isc__socketthread_t *thread, int fd, int msg) { - isc_result_t result = ISC_R_SUCCESS; - -#ifdef USE_KQUEUE - struct kevent evchange; - - memset(&evchange, 0, sizeof(evchange)); - if (msg == SELECT_POKE_READ) { - evchange.filter = EVFILT_READ; - } else { - evchange.filter = EVFILT_WRITE; - } - evchange.flags = EV_ADD; - evchange.ident = fd; - if (kevent(thread->kqueue_fd, &evchange, 1, NULL, 0, NULL) != 0) { - result = isc__errno2result(errno); - } - - return (result); -#elif defined(USE_EPOLL) - struct epoll_event event; - uint32_t oldevents; - int ret; - int op; - - oldevents = thread->epoll_events[fd]; - if (msg == SELECT_POKE_READ) { - thread->epoll_events[fd] |= EPOLLIN; - } else { - thread->epoll_events[fd] |= EPOLLOUT; - } - - event.events = thread->epoll_events[fd]; - memset(&event.data, 0, sizeof(event.data)); - event.data.fd = fd; - - op = (oldevents == 0U) ? EPOLL_CTL_ADD : EPOLL_CTL_MOD; - if (thread->fds[fd] != NULL) { - LOCK(&thread->fds[fd]->lock); - } - ret = epoll_ctl(thread->epoll_fd, op, fd, &event); - if (thread->fds[fd] != NULL) { - UNLOCK(&thread->fds[fd]->lock); - } - if (ret == -1) { - if (errno == EEXIST) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "epoll_ctl(ADD/MOD) returned " - "EEXIST for fd %d", - fd); - } - result = isc__errno2result(errno); - } - - return (result); -#elif defined(USE_DEVPOLL) - struct pollfd pfd; - - memset(&pfd, 0, sizeof(pfd)); - if (msg == SELECT_POKE_READ) { - pfd.events = POLLIN; - } else { - pfd.events = POLLOUT; - } - pfd.fd = fd; - pfd.revents = 0; - if (write(thread->devpoll_fd, &pfd, sizeof(pfd)) == -1) { - result = isc__errno2result(errno); - } else { - if (msg == SELECT_POKE_READ) { - thread->fdpollinfo[fd].want_read = 1; - } else { - thread->fdpollinfo[fd].want_write = 1; - } - } - - return (result); -#elif defined(USE_SELECT) - LOCK(&thread->manager->lock); - if (msg == SELECT_POKE_READ) { - FD_SET(fd, thread->read_fds); - } - if (msg == SELECT_POKE_WRITE) { - FD_SET(fd, thread->write_fds); - } - UNLOCK(&thread->manager->lock); - - return (result); -#endif /* ifdef USE_KQUEUE */ -} - -static inline isc_result_t -unwatch_fd(isc__socketthread_t *thread, int fd, int msg) { - isc_result_t result = ISC_R_SUCCESS; - -#ifdef USE_KQUEUE - struct kevent evchange; - - memset(&evchange, 0, sizeof(evchange)); - if (msg == SELECT_POKE_READ) { - evchange.filter = EVFILT_READ; - } else { - evchange.filter = EVFILT_WRITE; - } - evchange.flags = EV_DELETE; - evchange.ident = fd; - if (kevent(thread->kqueue_fd, &evchange, 1, NULL, 0, NULL) != 0) { - result = isc__errno2result(errno); - } - - return (result); -#elif defined(USE_EPOLL) - struct epoll_event event; - int ret; - int op; - - if (msg == SELECT_POKE_READ) { - thread->epoll_events[fd] &= ~(EPOLLIN); - } else { - thread->epoll_events[fd] &= ~(EPOLLOUT); - } - - event.events = thread->epoll_events[fd]; - memset(&event.data, 0, sizeof(event.data)); - event.data.fd = fd; - - op = (event.events == 0U) ? EPOLL_CTL_DEL : EPOLL_CTL_MOD; - ret = epoll_ctl(thread->epoll_fd, op, fd, &event); - if (ret == -1 && errno != ENOENT) { - char strbuf[ISC_STRERRORSIZE]; - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "epoll_ctl(DEL), %d: %s", - fd, strbuf); - result = ISC_R_UNEXPECTED; - } - return (result); -#elif defined(USE_DEVPOLL) - struct pollfd pfds[2]; - size_t writelen = sizeof(pfds[0]); - - memset(pfds, 0, sizeof(pfds)); - pfds[0].events = POLLREMOVE; - pfds[0].fd = fd; - - /* - * Canceling read or write polling via /dev/poll is tricky. Since it - * only provides a way of canceling per FD, we may need to re-poll the - * socket for the other operation. - */ - if (msg == SELECT_POKE_READ && thread->fdpollinfo[fd].want_write == 1) { - pfds[1].events = POLLOUT; - pfds[1].fd = fd; - writelen += sizeof(pfds[1]); - } - if (msg == SELECT_POKE_WRITE && thread->fdpollinfo[fd].want_read == 1) { - pfds[1].events = POLLIN; - pfds[1].fd = fd; - writelen += sizeof(pfds[1]); - } - - if (write(thread->devpoll_fd, pfds, writelen) == -1) { - result = isc__errno2result(errno); - } else { - if (msg == SELECT_POKE_READ) { - thread->fdpollinfo[fd].want_read = 0; - } else { - thread->fdpollinfo[fd].want_write = 0; - } - } - - return (result); -#elif defined(USE_SELECT) - LOCK(&thread->manager->lock); - if (msg == SELECT_POKE_READ) { - FD_CLR(fd, thread->read_fds); - } else if (msg == SELECT_POKE_WRITE) { - FD_CLR(fd, thread->write_fds); - } - UNLOCK(&thread->manager->lock); - - return (result); -#endif /* ifdef USE_KQUEUE */ -} - -/* - * A poke message was received, perform a proper watch/unwatch - * on a fd provided - */ -static void -wakeup_socket(isc__socketthread_t *thread, int fd, int msg) { - isc_result_t result; - int lockid = FDLOCK_ID(fd); - - /* - * This is a wakeup on a socket. If the socket is not in the - * process of being closed, start watching it for either reads - * or writes. - */ - - INSIST(fd >= 0 && fd < (int)thread->manager->maxsocks); - - if (msg == SELECT_POKE_CLOSE) { - LOCK(&thread->fdlock[lockid]); - INSIST(thread->fdstate[fd] == CLOSE_PENDING); - thread->fdstate[fd] = CLOSED; - (void)unwatch_fd(thread, fd, SELECT_POKE_READ); - (void)unwatch_fd(thread, fd, SELECT_POKE_WRITE); - (void)close(fd); - UNLOCK(&thread->fdlock[lockid]); - return; - } - - LOCK(&thread->fdlock[lockid]); - if (thread->fdstate[fd] == CLOSE_PENDING) { - /* - * We accept (and ignore) any error from unwatch_fd() as we are - * closing the socket, hoping it doesn't leave dangling state in - * the kernel. - * Note that unwatch_fd() must be called after releasing the - * fdlock; otherwise it could cause deadlock due to a lock order - * reversal. - */ - (void)unwatch_fd(thread, fd, SELECT_POKE_READ); - (void)unwatch_fd(thread, fd, SELECT_POKE_WRITE); - UNLOCK(&thread->fdlock[lockid]); - return; - } - if (thread->fdstate[fd] != MANAGED) { - UNLOCK(&thread->fdlock[lockid]); - return; - } - - /* - * Set requested bit. - */ - result = watch_fd(thread, fd, msg); - if (result != ISC_R_SUCCESS) { - /* - * XXXJT: what should we do? Ignoring the failure of watching - * a socket will make the application dysfunctional, but there - * seems to be no reasonable recovery process. - */ - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "failed to start watching FD (%d): %s", fd, - isc_result_totext(result)); - } - UNLOCK(&thread->fdlock[lockid]); -} - -/* - * Poke the select loop when there is something for us to do. - * The write is required (by POSIX) to complete. That is, we - * will not get partial writes. - */ -static void -select_poke(isc_socketmgr_t *mgr, int threadid, int fd, int msg) { - int cc; - int buf[2]; - char strbuf[ISC_STRERRORSIZE]; - - buf[0] = fd; - buf[1] = msg; - - do { - cc = write(mgr->threads[threadid].pipe_fds[1], buf, - sizeof(buf)); -#ifdef ENOSR - /* - * Treat ENOSR as EAGAIN but loop slowly as it is - * unlikely to clear fast. - */ - if (cc < 0 && errno == ENOSR) { - sleep(1); - errno = EAGAIN; - } -#endif /* ifdef ENOSR */ - } while (cc < 0 && SOFT_ERROR(errno)); - - if (cc < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - FATAL_ERROR(__FILE__, __LINE__, - "write() failed during watcher poke: %s", strbuf); - } - - INSIST(cc == sizeof(buf)); -} - -/* - * Read a message on the internal fd. - */ -static void -select_readmsg(isc__socketthread_t *thread, int *fd, int *msg) { - int buf[2]; - int cc; - char strbuf[ISC_STRERRORSIZE]; - - cc = read(thread->pipe_fds[0], buf, sizeof(buf)); - if (cc < 0) { - *msg = SELECT_POKE_NOTHING; - *fd = -1; /* Silence compiler. */ - if (SOFT_ERROR(errno)) { - return; - } - - strerror_r(errno, strbuf, sizeof(strbuf)); - FATAL_ERROR(__FILE__, __LINE__, - "read() failed during watcher poke: %s", strbuf); - } - INSIST(cc == sizeof(buf)); - - *fd = buf[0]; - *msg = buf[1]; -} - -/* - * Make a fd non-blocking. - */ -static isc_result_t -make_nonblock(int fd) { - int ret; - char strbuf[ISC_STRERRORSIZE]; -#ifdef USE_FIONBIO_IOCTL - int on = 1; -#else /* ifdef USE_FIONBIO_IOCTL */ - int flags; -#endif /* ifdef USE_FIONBIO_IOCTL */ - -#ifdef USE_FIONBIO_IOCTL - ret = ioctl(fd, FIONBIO, (char *)&on); -#else /* ifdef USE_FIONBIO_IOCTL */ - flags = fcntl(fd, F_GETFL, 0); - flags |= O_NONBLOCK; - ret = fcntl(fd, F_SETFL, flags); -#endif /* ifdef USE_FIONBIO_IOCTL */ - - if (ret == -1) { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, -#ifdef USE_FIONBIO_IOCTL - "ioctl(%d, FIONBIO, &on): %s", fd, -#else /* ifdef USE_FIONBIO_IOCTL */ - "fcntl(%d, F_SETFL, %d): %s", fd, flags, -#endif /* ifdef USE_FIONBIO_IOCTL */ - strbuf); - - return (ISC_R_UNEXPECTED); - } - - return (ISC_R_SUCCESS); -} - -#ifdef USE_CMSG -/* - * Not all OSes support advanced CMSG macros: CMSG_LEN and CMSG_SPACE. - * In order to ensure as much portability as possible, we provide wrapper - * functions of these macros. - * Note that cmsg_space() could run slow on OSes that do not have - * CMSG_SPACE. - */ -static inline socklen_t -cmsg_len(socklen_t len) { -#ifdef CMSG_LEN - return (CMSG_LEN(len)); -#else /* ifdef CMSG_LEN */ - socklen_t hdrlen; - - /* - * Cast NULL so that any pointer arithmetic performed by CMSG_DATA - * is correct. - */ - hdrlen = (socklen_t)CMSG_DATA(((struct cmsghdr *)NULL)); - return (hdrlen + len); -#endif /* ifdef CMSG_LEN */ -} - -static inline socklen_t -cmsg_space(socklen_t len) { -#ifdef CMSG_SPACE - return (CMSG_SPACE(len)); -#else /* ifdef CMSG_SPACE */ - struct msghdr msg; - struct cmsghdr *cmsgp; - /* - * XXX: The buffer length is an ad-hoc value, but should be enough - * in a practical sense. - */ - char dummybuf[sizeof(struct cmsghdr) + 1024]; - - memset(&msg, 0, sizeof(msg)); - msg.msg_control = dummybuf; - msg.msg_controllen = sizeof(dummybuf); - - cmsgp = (struct cmsghdr *)dummybuf; - cmsgp->cmsg_len = cmsg_len(len); - - cmsgp = CMSG_NXTHDR(&msg, cmsgp); - if (cmsgp != NULL) { - return ((char *)cmsgp - (char *)msg.msg_control); - } else { - return (0); - } -#endif /* ifdef CMSG_SPACE */ -} -#endif /* USE_CMSG */ - -/* - * Process control messages received on a socket. - */ -static void -process_cmsg(isc_socket_t *sock, struct msghdr *msg, isc_socketevent_t *dev) { -#ifdef USE_CMSG - struct cmsghdr *cmsgp; - struct in6_pktinfo *pktinfop; -#ifdef SO_TIMESTAMP - void *timevalp; -#endif /* ifdef SO_TIMESTAMP */ -#endif /* ifdef USE_CMSG */ - - /* - * sock is used only when ISC_NET_BSD44MSGHDR and USE_CMSG are defined. - * msg and dev are used only when ISC_NET_BSD44MSGHDR is defined. - * They are all here, outside of the CPP tests, because it is - * more consistent with the usual ISC coding style. - */ - UNUSED(sock); - UNUSED(msg); - UNUSED(dev); - -#ifdef MSG_TRUNC - if ((msg->msg_flags & MSG_TRUNC) != 0) { - dev->attributes |= ISC_SOCKEVENTATTR_TRUNC; - } -#endif /* ifdef MSG_TRUNC */ - -#ifdef MSG_CTRUNC - if ((msg->msg_flags & MSG_CTRUNC) != 0) { - dev->attributes |= ISC_SOCKEVENTATTR_CTRUNC; - } -#endif /* ifdef MSG_CTRUNC */ - -#ifndef USE_CMSG - return; -#else /* ifndef USE_CMSG */ - if (msg->msg_controllen == 0U || msg->msg_control == NULL) { - return; - } - -#ifdef SO_TIMESTAMP - timevalp = NULL; -#endif /* ifdef SO_TIMESTAMP */ - pktinfop = NULL; - - cmsgp = CMSG_FIRSTHDR(msg); - while (cmsgp != NULL) { - socket_log(sock, NULL, TRACE, "processing cmsg %p", cmsgp); - - if (cmsgp->cmsg_level == IPPROTO_IPV6 && - cmsgp->cmsg_type == IPV6_PKTINFO) { - pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp); - memmove(&dev->pktinfo, pktinfop, - sizeof(struct in6_pktinfo)); - dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; - socket_log(sock, NULL, TRACE, - "interface received on ifindex %u", - dev->pktinfo.ipi6_ifindex); - if (IN6_IS_ADDR_MULTICAST(&pktinfop->ipi6_addr)) { - dev->attributes |= ISC_SOCKEVENTATTR_MULTICAST; - } - goto next; - } - -#ifdef SO_TIMESTAMP - if (cmsgp->cmsg_level == SOL_SOCKET && - cmsgp->cmsg_type == SCM_TIMESTAMP) { - struct timeval tv; - timevalp = CMSG_DATA(cmsgp); - memmove(&tv, timevalp, sizeof(tv)); - dev->timestamp.seconds = tv.tv_sec; - dev->timestamp.nanoseconds = tv.tv_usec * 1000; - dev->attributes |= ISC_SOCKEVENTATTR_TIMESTAMP; - goto next; - } -#endif /* ifdef SO_TIMESTAMP */ - -#ifdef IPV6_TCLASS - if (cmsgp->cmsg_level == IPPROTO_IPV6 && - cmsgp->cmsg_type == IPV6_TCLASS) { - dev->dscp = *(int *)CMSG_DATA(cmsgp); - dev->dscp >>= 2; - dev->attributes |= ISC_SOCKEVENTATTR_DSCP; - goto next; - } -#endif /* ifdef IPV6_TCLASS */ - -#ifdef IP_TOS - if (cmsgp->cmsg_level == IPPROTO_IP && - (cmsgp->cmsg_type == IP_TOS -#ifdef IP_RECVTOS - || cmsgp->cmsg_type == IP_RECVTOS -#endif /* ifdef IP_RECVTOS */ - )) - { - dev->dscp = (int)*(unsigned char *)CMSG_DATA(cmsgp); - dev->dscp >>= 2; - dev->attributes |= ISC_SOCKEVENTATTR_DSCP; - goto next; - } -#endif /* ifdef IP_TOS */ - next: - cmsgp = CMSG_NXTHDR(msg, cmsgp); - } -#endif /* USE_CMSG */ -} - -/* - * Construct an iov array and attach it to the msghdr passed in. This is - * the SEND constructor, which will use the used region of the buffer - * (if using a buffer list) or will use the internal region (if a single - * buffer I/O is requested). - * - * Nothing can be NULL, and the done event must list at least one buffer - * on the buffer linked list for this function to be meaningful. - * - * If write_countp != NULL, *write_countp will hold the number of bytes - * this transaction can send. - */ -static void -build_msghdr_send(isc_socket_t *sock, char *cmsgbuf, isc_socketevent_t *dev, - struct msghdr *msg, struct iovec *iov, size_t *write_countp) { - unsigned int iovcount; - size_t write_count; - struct cmsghdr *cmsgp; - - memset(msg, 0, sizeof(*msg)); - - if (!sock->connected) { - msg->msg_name = (void *)&dev->address.type.sa; - msg->msg_namelen = dev->address.length; - } else { - msg->msg_name = NULL; - msg->msg_namelen = 0; - } - - write_count = dev->region.length - dev->n; - iov[0].iov_base = (void *)(dev->region.base + dev->n); - iov[0].iov_len = write_count; - iovcount = 1; - - msg->msg_iov = iov; - msg->msg_iovlen = iovcount; - msg->msg_control = NULL; - msg->msg_controllen = 0; - msg->msg_flags = 0; -#if defined(USE_CMSG) - - if ((sock->type == isc_sockettype_udp) && - ((dev->attributes & ISC_SOCKEVENTATTR_PKTINFO) != 0)) - { - struct in6_pktinfo *pktinfop; - - socket_log(sock, NULL, TRACE, "sendto pktinfo data, ifindex %u", - dev->pktinfo.ipi6_ifindex); - - msg->msg_control = (void *)cmsgbuf; - msg->msg_controllen = cmsg_space(sizeof(struct in6_pktinfo)); - INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); - - cmsgp = (struct cmsghdr *)cmsgbuf; - cmsgp->cmsg_level = IPPROTO_IPV6; - cmsgp->cmsg_type = IPV6_PKTINFO; - cmsgp->cmsg_len = cmsg_len(sizeof(struct in6_pktinfo)); - pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp); - memmove(pktinfop, &dev->pktinfo, sizeof(struct in6_pktinfo)); - } - -#if defined(IPV6_USE_MIN_MTU) - if ((sock->type == isc_sockettype_udp) && (sock->pf == AF_INET6) && - ((dev->attributes & ISC_SOCKEVENTATTR_USEMINMTU) != 0)) - { - int use_min_mtu = 1; /* -1, 0, 1 */ - - cmsgp = (struct cmsghdr *)(cmsgbuf + msg->msg_controllen); - msg->msg_control = (void *)cmsgbuf; - msg->msg_controllen += cmsg_space(sizeof(use_min_mtu)); - INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); - - cmsgp->cmsg_level = IPPROTO_IPV6; - cmsgp->cmsg_type = IPV6_USE_MIN_MTU; - cmsgp->cmsg_len = cmsg_len(sizeof(use_min_mtu)); - memmove(CMSG_DATA(cmsgp), &use_min_mtu, sizeof(use_min_mtu)); - } -#endif /* if defined(IPV6_USE_MIN_MTU) */ - - if (isc_dscp_check_value > -1) { - if (sock->type == isc_sockettype_udp) { - INSIST((int)dev->dscp == isc_dscp_check_value); - } else if (sock->type == isc_sockettype_tcp) { - INSIST((int)sock->dscp == isc_dscp_check_value); - } - } - -#if defined(IP_TOS) || (defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)) - if ((sock->type == isc_sockettype_udp) && - ((dev->attributes & ISC_SOCKEVENTATTR_DSCP) != 0)) - { - int dscp = (dev->dscp << 2) & 0xff; - - INSIST(dev->dscp < 0x40); - -#ifdef IP_TOS - if (sock->pf == AF_INET && sock->pktdscp) { - cmsgp = (struct cmsghdr *)(cmsgbuf + - msg->msg_controllen); - msg->msg_control = (void *)cmsgbuf; - msg->msg_controllen += cmsg_space(sizeof(dscp)); - INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); - - cmsgp->cmsg_level = IPPROTO_IP; - cmsgp->cmsg_type = IP_TOS; - cmsgp->cmsg_len = cmsg_len(sizeof(char)); - *(unsigned char *)CMSG_DATA(cmsgp) = dscp; - } else if (sock->pf == AF_INET && sock->dscp != dev->dscp) { - if (setsockopt(sock->fd, IPPROTO_IP, IP_TOS, - (void *)&dscp, sizeof(int)) < 0) { - char strbuf[ISC_STRERRORSIZE]; - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IP_TOS, %.02x)" - " failed: %s", - sock->fd, dscp >> 2, strbuf); - } else { - sock->dscp = dscp; - } - } -#endif /* ifdef IP_TOS */ -#if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS) - if (sock->pf == AF_INET6 && sock->pktdscp) { - cmsgp = (struct cmsghdr *)(cmsgbuf + - msg->msg_controllen); - msg->msg_control = (void *)cmsgbuf; - msg->msg_controllen += cmsg_space(sizeof(dscp)); - INSIST(msg->msg_controllen <= SENDCMSGBUFLEN); - - cmsgp->cmsg_level = IPPROTO_IPV6; - cmsgp->cmsg_type = IPV6_TCLASS; - cmsgp->cmsg_len = cmsg_len(sizeof(dscp)); - memmove(CMSG_DATA(cmsgp), &dscp, sizeof(dscp)); - } else if (sock->pf == AF_INET6 && sock->dscp != dev->dscp) { - if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_TCLASS, - (void *)&dscp, sizeof(int)) < 0) - { - char strbuf[ISC_STRERRORSIZE]; - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IPV6_TCLASS, " - "%.02x) failed: %s", - sock->fd, dscp >> 2, strbuf); - } else { - sock->dscp = dscp; - } - } -#endif /* if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS) */ - if (msg->msg_controllen != 0 && - msg->msg_controllen < SENDCMSGBUFLEN) { - memset(cmsgbuf + msg->msg_controllen, 0, - SENDCMSGBUFLEN - msg->msg_controllen); - } - } -#endif /* if defined(IP_TOS) || (defined(IPPROTO_IPV6) && \ - * defined(IPV6_TCLASS)) \ - * */ -#endif /* USE_CMSG */ - - if (write_countp != NULL) { - *write_countp = write_count; - } -} - -/* - * Construct an iov array and attach it to the msghdr passed in. This is - * the RECV constructor, which will use the available region of the buffer - * (if using a buffer list) or will use the internal region (if a single - * buffer I/O is requested). - * - * Nothing can be NULL, and the done event must list at least one buffer - * on the buffer linked list for this function to be meaningful. - * - * If read_countp != NULL, *read_countp will hold the number of bytes - * this transaction can receive. - */ -static void -build_msghdr_recv(isc_socket_t *sock, char *cmsgbuf, isc_socketevent_t *dev, - struct msghdr *msg, struct iovec *iov, size_t *read_countp) { - unsigned int iovcount; - size_t read_count; - - memset(msg, 0, sizeof(struct msghdr)); - - if (sock->type == isc_sockettype_udp) { - memset(&dev->address, 0, sizeof(dev->address)); - msg->msg_name = (void *)&dev->address.type.sa; - msg->msg_namelen = sizeof(dev->address.type); - } else { /* TCP */ - msg->msg_name = NULL; - msg->msg_namelen = 0; - dev->address = sock->peer_address; - } - - read_count = dev->region.length - dev->n; - iov[0].iov_base = (void *)(dev->region.base + dev->n); - iov[0].iov_len = read_count; - iovcount = 1; - - /* - * If needed, set up to receive that one extra byte. - */ -#ifdef ISC_PLATFORM_RECVOVERFLOW - if (sock->type == isc_sockettype_udp) { - INSIST(iovcount < MAXSCATTERGATHER_RECV); - iov[iovcount].iov_base = (void *)(&sock->overflow); - iov[iovcount].iov_len = 1; - iovcount++; - } -#endif /* ifdef ISC_PLATFORM_RECVOVERFLOW */ - - msg->msg_iov = iov; - msg->msg_iovlen = iovcount; - -#if defined(USE_CMSG) - msg->msg_control = cmsgbuf; - msg->msg_controllen = RECVCMSGBUFLEN; -#else /* if defined(USE_CMSG) */ - msg->msg_control = NULL; - msg->msg_controllen = 0; -#endif /* USE_CMSG */ - msg->msg_flags = 0; - - if (read_countp != NULL) { - *read_countp = read_count; - } -} - -static void -set_dev_address(const isc_sockaddr_t *address, isc_socket_t *sock, - isc_socketevent_t *dev) { - if (sock->type == isc_sockettype_udp) { - if (address != NULL) { - dev->address = *address; - } else { - dev->address = sock->peer_address; - } - } else if (sock->type == isc_sockettype_tcp) { - INSIST(address == NULL); - dev->address = sock->peer_address; - } -} - -static void -destroy_socketevent(isc_event_t *event) { - isc_socketevent_t *ev = (isc_socketevent_t *)event; - - (ev->destroy)(event); -} - -static isc_socketevent_t * -allocate_socketevent(isc_mem_t *mctx, void *sender, isc_eventtype_t eventtype, - isc_taskaction_t action, void *arg) { - isc_socketevent_t *ev; - - ev = (isc_socketevent_t *)isc_event_allocate(mctx, sender, eventtype, - action, arg, sizeof(*ev)); - - ev->result = ISC_R_UNSET; - ISC_LINK_INIT(ev, ev_link); - ev->region.base = NULL; - ev->n = 0; - ev->offset = 0; - ev->attributes = 0; - ev->destroy = ev->ev_destroy; - ev->ev_destroy = destroy_socketevent; - ev->dscp = 0; - - return (ev); -} - -#if defined(ISC_SOCKET_DEBUG) -static void -dump_msg(struct msghdr *msg) { - unsigned int i; - - printf("MSGHDR %p\n", msg); - printf("\tname %p, namelen %ld\n", msg->msg_name, - (long)msg->msg_namelen); - printf("\tiov %p, iovlen %ld\n", msg->msg_iov, (long)msg->msg_iovlen); - for (i = 0; i < (unsigned int)msg->msg_iovlen; i++) - printf("\t\t%u\tbase %p, len %ld\n", i, - msg->msg_iov[i].iov_base, (long)msg->msg_iov[i].iov_len); - printf("\tcontrol %p, controllen %ld\n", msg->msg_control, - (long)msg->msg_controllen); -} -#endif /* if defined(ISC_SOCKET_DEBUG) */ - -#define DOIO_SUCCESS 0 /* i/o ok, event sent */ -#define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */ -#define DOIO_HARD 2 /* i/o error, event sent */ -#define DOIO_EOF 3 /* EOF, no event sent */ - -static int -doio_recv(isc_socket_t *sock, isc_socketevent_t *dev) { - int cc; - struct iovec iov[MAXSCATTERGATHER_RECV]; - size_t read_count; - struct msghdr msghdr; - int recv_errno; - char strbuf[ISC_STRERRORSIZE]; - char cmsgbuf[RECVCMSGBUFLEN] = { 0 }; - - build_msghdr_recv(sock, cmsgbuf, dev, &msghdr, iov, &read_count); - -#if defined(ISC_SOCKET_DEBUG) - dump_msg(&msghdr); -#endif /* if defined(ISC_SOCKET_DEBUG) */ - - cc = recvmsg(sock->fd, &msghdr, 0); - recv_errno = errno; - -#if defined(ISC_SOCKET_DEBUG) - dump_msg(&msghdr); -#endif /* if defined(ISC_SOCKET_DEBUG) */ - - if (cc < 0) { - if (SOFT_ERROR(recv_errno)) { - return (DOIO_SOFT); - } - - if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { - strerror_r(recv_errno, strbuf, sizeof(strbuf)); - socket_log(sock, NULL, IOEVENT, - "doio_recv: recvmsg(%d) %d bytes, err %d/%s", - sock->fd, cc, recv_errno, strbuf); - } - -#define SOFT_OR_HARD(_system, _isc) \ - if (recv_errno == _system) { \ - if (sock->connected) { \ - dev->result = _isc; \ - inc_stats(sock->manager->stats, \ - sock->statsindex[STATID_RECVFAIL]); \ - return (DOIO_HARD); \ - } \ - return (DOIO_SOFT); \ - } -#define ALWAYS_HARD(_system, _isc) \ - if (recv_errno == _system) { \ - dev->result = _isc; \ - inc_stats(sock->manager->stats, \ - sock->statsindex[STATID_RECVFAIL]); \ - return (DOIO_HARD); \ - } - - SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED); - SOFT_OR_HARD(ENETUNREACH, ISC_R_NETUNREACH); - SOFT_OR_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH); - SOFT_OR_HARD(EHOSTDOWN, ISC_R_HOSTDOWN); - SOFT_OR_HARD(ENOBUFS, ISC_R_NORESOURCES); - /* - * Older operating systems may still return EPROTO in some - * situations, for example when receiving ICMP/ICMPv6 errors. - * A real life scenario is when ICMPv6 returns code 5 or 6. - * These codes are introduced in RFC 4443 from March 2006, - * and the document obsoletes RFC 1885. But unfortunately not - * all operating systems have caught up with the new standard - * (in 2020) and thus a generic protocol error is returned. - */ - SOFT_OR_HARD(EPROTO, ISC_R_HOSTUNREACH); - /* Should never get this one but it was seen. */ -#ifdef ENOPROTOOPT - SOFT_OR_HARD(ENOPROTOOPT, ISC_R_HOSTUNREACH); -#endif /* ifdef ENOPROTOOPT */ - SOFT_OR_HARD(EINVAL, ISC_R_HOSTUNREACH); - -#undef SOFT_OR_HARD -#undef ALWAYS_HARD - - dev->result = isc__errno2result(recv_errno); - inc_stats(sock->manager->stats, - sock->statsindex[STATID_RECVFAIL]); - return (DOIO_HARD); - } - - /* - * On TCP and UNIX sockets, zero length reads indicate EOF, - * while on UDP sockets, zero length reads are perfectly valid, - * although strange. - */ - switch (sock->type) { - case isc_sockettype_tcp: - case isc_sockettype_unix: - if (cc == 0) { - return (DOIO_EOF); - } - break; - case isc_sockettype_udp: - case isc_sockettype_raw: - break; - default: - INSIST(0); - ISC_UNREACHABLE(); - } - - if (sock->type == isc_sockettype_udp) { - dev->address.length = msghdr.msg_namelen; - if (isc_sockaddr_getport(&dev->address) == 0) { - if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { - socket_log(sock, &dev->address, IOEVENT, - "dropping source port zero packet"); - } - return (DOIO_SOFT); - } - /* - * Simulate a firewall blocking UDP responses bigger than - * 'maxudp' bytes. - */ - if (sock->manager->maxudp != 0 && - cc > (int)sock->manager->maxudp) { - return (DOIO_SOFT); - } - } - - socket_log(sock, &dev->address, IOEVENT, "packet received correctly"); - - /* - * Overflow bit detection. If we received MORE bytes than we should, - * this indicates an overflow situation. Set the flag in the - * dev entry and adjust how much we read by one. - */ -#ifdef ISC_PLATFORM_RECVOVERFLOW - if ((sock->type == isc_sockettype_udp) && ((size_t)cc > read_count)) { - dev->attributes |= ISC_SOCKEVENTATTR_TRUNC; - cc--; - } -#endif /* ifdef ISC_PLATFORM_RECVOVERFLOW */ - - /* - * If there are control messages attached, run through them and pull - * out the interesting bits. - */ - process_cmsg(sock, &msghdr, dev); - - /* - * update the buffers (if any) and the i/o count - */ - dev->n += cc; - - /* - * If we read less than we expected, update counters, - * and let the upper layer poke the descriptor. - */ - if (((size_t)cc != read_count) && (dev->n < dev->minimum)) { - return (DOIO_SOFT); - } - - /* - * Full reads are posted, or partials if partials are ok. - */ - dev->result = ISC_R_SUCCESS; - return (DOIO_SUCCESS); -} - -/* - * Returns: - * DOIO_SUCCESS The operation succeeded. dev->result contains - * ISC_R_SUCCESS. - * - * DOIO_HARD A hard or unexpected I/O error was encountered. - * dev->result contains the appropriate error. - * - * DOIO_SOFT A soft I/O error was encountered. No senddone - * event was sent. The operation should be retried. - * - * No other return values are possible. - */ -static int -doio_send(isc_socket_t *sock, isc_socketevent_t *dev) { - int cc; - struct iovec iov[MAXSCATTERGATHER_SEND]; - size_t write_count; - struct msghdr msghdr; - char addrbuf[ISC_SOCKADDR_FORMATSIZE]; - int attempts = 0; - int send_errno; - char strbuf[ISC_STRERRORSIZE]; - char cmsgbuf[SENDCMSGBUFLEN] = { 0 }; - - build_msghdr_send(sock, cmsgbuf, dev, &msghdr, iov, &write_count); - -resend: - if (sock->type == isc_sockettype_udp && sock->manager->maxudp != 0 && - write_count > sock->manager->maxudp) - { - cc = write_count; - } else { - cc = sendmsg(sock->fd, &msghdr, 0); - } - send_errno = errno; - - /* - * Check for error or block condition. - */ - if (cc < 0) { - if (send_errno == EINTR && ++attempts < NRETRIES) { - goto resend; - } - - if (SOFT_ERROR(send_errno)) { - if (errno == EWOULDBLOCK || errno == EAGAIN) { - dev->result = ISC_R_WOULDBLOCK; - } - return (DOIO_SOFT); - } - -#define SOFT_OR_HARD(_system, _isc) \ - if (send_errno == _system) { \ - if (sock->connected) { \ - dev->result = _isc; \ - inc_stats(sock->manager->stats, \ - sock->statsindex[STATID_SENDFAIL]); \ - return (DOIO_HARD); \ - } \ - return (DOIO_SOFT); \ - } -#define ALWAYS_HARD(_system, _isc) \ - if (send_errno == _system) { \ - dev->result = _isc; \ - inc_stats(sock->manager->stats, \ - sock->statsindex[STATID_SENDFAIL]); \ - return (DOIO_HARD); \ - } - - SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED); - ALWAYS_HARD(EACCES, ISC_R_NOPERM); - ALWAYS_HARD(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); - ALWAYS_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); - ALWAYS_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH); -#ifdef EHOSTDOWN - ALWAYS_HARD(EHOSTDOWN, ISC_R_HOSTUNREACH); -#endif /* ifdef EHOSTDOWN */ - ALWAYS_HARD(ENETUNREACH, ISC_R_NETUNREACH); - SOFT_OR_HARD(ENOBUFS, ISC_R_NORESOURCES); - ALWAYS_HARD(EPERM, ISC_R_HOSTUNREACH); - ALWAYS_HARD(EPIPE, ISC_R_NOTCONNECTED); - ALWAYS_HARD(ECONNRESET, ISC_R_CONNECTIONRESET); - -#undef SOFT_OR_HARD -#undef ALWAYS_HARD - - /* - * The other error types depend on whether or not the - * socket is UDP or TCP. If it is UDP, some errors - * that we expect to be fatal under TCP are merely - * annoying, and are really soft errors. - * - * However, these soft errors are still returned as - * a status. - */ - isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf)); - strerror_r(send_errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "internal_send: %s: %s", - addrbuf, strbuf); - dev->result = isc__errno2result(send_errno); - inc_stats(sock->manager->stats, - sock->statsindex[STATID_SENDFAIL]); - return (DOIO_HARD); - } - - if (cc == 0) { - inc_stats(sock->manager->stats, - sock->statsindex[STATID_SENDFAIL]); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "doio_send: send() returned 0"); - } - - /* - * If we write less than we expected, update counters, poke. - */ - dev->n += cc; - if ((size_t)cc != write_count) { - return (DOIO_SOFT); - } - - /* - * Exactly what we wanted to write. We're done with this - * entry. Post its completion event. - */ - dev->result = ISC_R_SUCCESS; - return (DOIO_SUCCESS); -} - -/* - * Kill. - * - * Caller must ensure that the socket is not locked and no external - * references exist. - */ -static void -socketclose(isc__socketthread_t *thread, isc_socket_t *sock, int fd) { - int lockid = FDLOCK_ID(fd); - /* - * No one has this socket open, so the watcher doesn't have to be - * poked, and the socket doesn't have to be locked. - */ - LOCK(&thread->fdlock[lockid]); - thread->fds[fd] = NULL; - thread->fdstate[fd] = CLOSE_PENDING; - UNLOCK(&thread->fdlock[lockid]); - select_poke(thread->manager, thread->threadid, fd, SELECT_POKE_CLOSE); - - inc_stats(thread->manager->stats, sock->statsindex[STATID_CLOSE]); - - LOCK(&sock->lock); - if (sock->active == 1) { - dec_stats(thread->manager->stats, - sock->statsindex[STATID_ACTIVE]); - sock->active = 0; - } - UNLOCK(&sock->lock); - - /* - * update manager->maxfd here (XXX: this should be implemented more - * efficiently) - */ -#ifdef USE_SELECT - LOCK(&thread->manager->lock); - if (thread->maxfd == fd) { - int i; - - thread->maxfd = 0; - for (i = fd - 1; i >= 0; i--) { - lockid = FDLOCK_ID(i); - - LOCK(&thread->fdlock[lockid]); - if (thread->fdstate[i] == MANAGED) { - thread->maxfd = i; - UNLOCK(&thread->fdlock[lockid]); - break; - } - UNLOCK(&thread->fdlock[lockid]); - } - if (thread->maxfd < thread->pipe_fds[0]) { - thread->maxfd = thread->pipe_fds[0]; - } - } - - UNLOCK(&thread->manager->lock); -#endif /* USE_SELECT */ -} - -static void -destroy(isc_socket_t **sockp) { - int fd = 0; - isc_socket_t *sock = *sockp; - isc_socketmgr_t *manager = sock->manager; - isc__socketthread_t *thread = NULL; - - socket_log(sock, NULL, CREATION, "destroying"); - - isc_refcount_destroy(&sock->references); - - LOCK(&sock->lock); - INSIST(ISC_LIST_EMPTY(sock->connect_list)); - INSIST(ISC_LIST_EMPTY(sock->accept_list)); - INSIST(ISC_LIST_EMPTY(sock->recv_list)); - INSIST(ISC_LIST_EMPTY(sock->send_list)); - INSIST(sock->fd >= -1 && sock->fd < (int)manager->maxsocks); - - if (sock->fd >= 0) { - fd = sock->fd; - thread = &manager->threads[sock->threadid]; - sock->fd = -1; - sock->threadid = -1; - } - UNLOCK(&sock->lock); - - if (fd > 0) { - socketclose(thread, sock, fd); - } - - LOCK(&manager->lock); - - ISC_LIST_UNLINK(manager->socklist, sock, link); - - if (ISC_LIST_EMPTY(manager->socklist)) { - SIGNAL(&manager->shutdown_ok); - } - - /* can't unlock manager as its memory context is still used */ - free_socket(sockp); - - UNLOCK(&manager->lock); -} - -static isc_result_t -allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type, - isc_socket_t **socketp) { - isc_socket_t *sock; - - sock = isc_mem_get(manager->mctx, sizeof(*sock)); - - sock->magic = 0; - isc_refcount_init(&sock->references, 0); - - sock->manager = manager; - sock->type = type; - sock->fd = -1; - sock->threadid = -1; - sock->dscp = 0; /* TOS/TCLASS is zero until set. */ - sock->statsindex = NULL; - sock->active = 0; - - ISC_LINK_INIT(sock, link); - - memset(sock->name, 0, sizeof(sock->name)); - sock->tag = NULL; - - /* - * Set up list of readers and writers to be initially empty. - */ - ISC_LIST_INIT(sock->recv_list); - ISC_LIST_INIT(sock->send_list); - ISC_LIST_INIT(sock->accept_list); - ISC_LIST_INIT(sock->connect_list); - - sock->listener = 0; - sock->connected = 0; - sock->connecting = 0; - sock->bound = 0; - sock->pktdscp = 0; - - /* - * Initialize the lock. - */ - isc_mutex_init(&sock->lock); - - sock->magic = SOCKET_MAGIC; - *socketp = sock; - - return (ISC_R_SUCCESS); -} - -/* - * This event requires that the various lists be empty, that the reference - * count be 1, and that the magic number is valid. The other socket bits, - * like the lock, must be initialized as well. The fd associated must be - * marked as closed, by setting it to -1 on close, or this routine will - * also close the socket. - */ -static void -free_socket(isc_socket_t **socketp) { - isc_socket_t *sock = *socketp; - *socketp = NULL; - - INSIST(VALID_SOCKET(sock)); - isc_refcount_destroy(&sock->references); - LOCK(&sock->lock); - INSIST(!sock->connecting); - INSIST(ISC_LIST_EMPTY(sock->recv_list)); - INSIST(ISC_LIST_EMPTY(sock->send_list)); - INSIST(ISC_LIST_EMPTY(sock->accept_list)); - INSIST(ISC_LIST_EMPTY(sock->connect_list)); - INSIST(!ISC_LINK_LINKED(sock, link)); - UNLOCK(&sock->lock); - - sock->magic = 0; - - isc_mutex_destroy(&sock->lock); - - isc_mem_put(sock->manager->mctx, sock, sizeof(*sock)); -} - -#if defined(SET_RCVBUF) -static isc_once_t rcvbuf_once = ISC_ONCE_INIT; -static int rcvbuf = ISC_RECV_BUFFER_SIZE; - -static void -set_rcvbuf(void) { - int fd; - int max = rcvbuf, min; - socklen_t len; - - fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (fd == -1) { - switch (errno) { - case EPROTONOSUPPORT: - case EPFNOSUPPORT: - case EAFNOSUPPORT: - /* - * Linux 2.2 (and maybe others) return EINVAL instead of - * EAFNOSUPPORT. - */ - case EINVAL: - fd = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP); - break; - } - } - if (fd == -1) { - return; - } - - len = sizeof(min); - if (getsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *)&min, &len) == 0 && - min < rcvbuf) - { - again: - if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *)&rcvbuf, - sizeof(rcvbuf)) == -1) - { - if (errno == ENOBUFS && rcvbuf > min) { - max = rcvbuf - 1; - rcvbuf = (rcvbuf + min) / 2; - goto again; - } else { - rcvbuf = min; - goto cleanup; - } - } else { - min = rcvbuf; - } - if (min != max) { - rcvbuf = max; - goto again; - } - } -cleanup: - close(fd); -} -#endif /* ifdef SO_RCVBUF */ - -#if defined(SET_SNDBUF) -static isc_once_t sndbuf_once = ISC_ONCE_INIT; -static int sndbuf = ISC_SEND_BUFFER_SIZE; - -static void -set_sndbuf(void) { - int fd; - int max = sndbuf, min; - socklen_t len; - - fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (fd == -1) { - switch (errno) { - case EPROTONOSUPPORT: - case EPFNOSUPPORT: - case EAFNOSUPPORT: - /* - * Linux 2.2 (and maybe others) return EINVAL instead of - * EAFNOSUPPORT. - */ - case EINVAL: - fd = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP); - break; - } - } - if (fd == -1) { - return; - } - - len = sizeof(min); - if (getsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *)&min, &len) == 0 && - min < sndbuf) - { - again: - if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *)&sndbuf, - sizeof(sndbuf)) == -1) - { - if (errno == ENOBUFS && sndbuf > min) { - max = sndbuf - 1; - sndbuf = (sndbuf + min) / 2; - goto again; - } else { - sndbuf = min; - goto cleanup; - } - } else { - min = sndbuf; - } - if (min != max) { - sndbuf = max; - goto again; - } - } -cleanup: - close(fd); -} -#endif /* ifdef SO_SNDBUF */ - -static void -use_min_mtu(isc_socket_t *sock) { -#if !defined(IPV6_USE_MIN_MTU) && !defined(IPV6_MTU) - UNUSED(sock); -#endif /* if !defined(IPV6_USE_MIN_MTU) && !defined(IPV6_MTU) */ -#ifdef IPV6_USE_MIN_MTU - /* use minimum MTU */ - if (sock->pf == AF_INET6) { - int on = 1; - (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU, - (void *)&on, sizeof(on)); - } -#endif /* ifdef IPV6_USE_MIN_MTU */ -#if defined(IPV6_MTU) - /* - * Use minimum MTU on IPv6 sockets. - */ - if (sock->pf == AF_INET6) { - int mtu = 1280; - (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_MTU, &mtu, - sizeof(mtu)); - } -#endif /* if defined(IPV6_MTU) */ -} - -static void -set_tcp_maxseg(isc_socket_t *sock, int size) { -#ifdef TCP_MAXSEG - if (sock->type == isc_sockettype_tcp) { - (void)setsockopt(sock->fd, IPPROTO_TCP, TCP_MAXSEG, - (void *)&size, sizeof(size)); - } -#endif /* ifdef TCP_MAXSEG */ -} - -static void -set_ip_disable_pmtud(isc_socket_t *sock) { - /* - * Disable Path MTU Discover on IP packets - */ - if (sock->pf == AF_INET6) { -#if defined(IPV6_DONTFRAG) - (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_DONTFRAG, - &(int){ 0 }, sizeof(int)); -#endif -#if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT) - (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER, - &(int){ IP_PMTUDISC_OMIT }, sizeof(int)); -#endif - } else if (sock->pf == AF_INET) { -#if defined(IP_DONTFRAG) - (void)setsockopt(sock->fd, IPPROTO_IP, IP_DONTFRAG, &(int){ 0 }, - sizeof(int)); -#endif -#if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT) - (void)setsockopt(sock->fd, IPPROTO_IP, IP_MTU_DISCOVER, - &(int){ IP_PMTUDISC_OMIT }, sizeof(int)); -#endif - } -} - -static isc_result_t -opensocket(isc_socketmgr_t *manager, isc_socket_t *sock) { - isc_result_t result; - char strbuf[ISC_STRERRORSIZE]; - const char *err = "socket"; - int tries = 0; -#if defined(USE_CMSG) || defined(SO_NOSIGPIPE) - int on = 1; -#endif /* if defined(USE_CMSG) || defined(SO_NOSIGPIPE) */ -#if defined(SET_RCVBUF) || defined(SET_SNDBUF) - socklen_t optlen; - int size = 0; -#endif - -again: - switch (sock->type) { - case isc_sockettype_udp: - sock->fd = socket(sock->pf, SOCK_DGRAM, IPPROTO_UDP); - break; - case isc_sockettype_tcp: - sock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP); - break; - case isc_sockettype_unix: - sock->fd = socket(sock->pf, SOCK_STREAM, 0); - break; - case isc_sockettype_raw: - errno = EPFNOSUPPORT; - /* - * PF_ROUTE is a alias for PF_NETLINK on linux. - */ -#if defined(PF_ROUTE) - if (sock->fd == -1 && sock->pf == PF_ROUTE) { -#ifdef NETLINK_ROUTE - sock->fd = socket(sock->pf, SOCK_RAW, NETLINK_ROUTE); -#else /* ifdef NETLINK_ROUTE */ - sock->fd = socket(sock->pf, SOCK_RAW, 0); -#endif /* ifdef NETLINK_ROUTE */ - if (sock->fd != -1) { -#ifdef NETLINK_ROUTE - struct sockaddr_nl sa; - int n; - - /* - * Do an implicit bind. - */ - memset(&sa, 0, sizeof(sa)); - sa.nl_family = AF_NETLINK; - sa.nl_groups = RTMGRP_IPV4_IFADDR | - RTMGRP_IPV6_IFADDR; - n = bind(sock->fd, (struct sockaddr *)&sa, - sizeof(sa)); - if (n < 0) { - close(sock->fd); - sock->fd = -1; - } -#endif /* ifdef NETLINK_ROUTE */ - sock->bound = 1; - } - } -#endif /* if defined(PF_ROUTE) */ - break; - } - if (sock->fd == -1 && errno == EINTR && tries++ < 42) { - goto again; - } - -#ifdef F_DUPFD - /* - * Leave a space for stdio and TCP to work in. - */ - if (manager->reserved != 0 && sock->type == isc_sockettype_udp && - sock->fd >= 0 && sock->fd < manager->reserved) - { - int newfd, tmp; - newfd = fcntl(sock->fd, F_DUPFD, manager->reserved); - tmp = errno; - (void)close(sock->fd); - errno = tmp; - sock->fd = newfd; - err = "isc_socket_create: fcntl/reserved"; - } else if (sock->fd >= 0 && sock->fd < 20) { - int newfd, tmp; - newfd = fcntl(sock->fd, F_DUPFD, 20); - tmp = errno; - (void)close(sock->fd); - errno = tmp; - sock->fd = newfd; - err = "isc_socket_create: fcntl"; - } -#endif /* ifdef F_DUPFD */ - - if (sock->fd >= (int)manager->maxsocks) { - (void)close(sock->fd); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "socket: file descriptor exceeds limit (%d/%u)", - sock->fd, manager->maxsocks); - inc_stats(manager->stats, sock->statsindex[STATID_OPENFAIL]); - return (ISC_R_NORESOURCES); - } - - if (sock->fd < 0) { - switch (errno) { - case EMFILE: - case ENFILE: - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "%s: %s", err, strbuf); - /* fallthrough */ - case ENOBUFS: - inc_stats(manager->stats, - sock->statsindex[STATID_OPENFAIL]); - return (ISC_R_NORESOURCES); - - case EPROTONOSUPPORT: - case EPFNOSUPPORT: - case EAFNOSUPPORT: - /* - * Linux 2.2 (and maybe others) return EINVAL instead of - * EAFNOSUPPORT. - */ - case EINVAL: - inc_stats(manager->stats, - sock->statsindex[STATID_OPENFAIL]); - return (ISC_R_FAMILYNOSUPPORT); - - default: - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "%s() failed: %s", - err, strbuf); - inc_stats(manager->stats, - sock->statsindex[STATID_OPENFAIL]); - return (ISC_R_UNEXPECTED); - } - } - - result = make_nonblock(sock->fd); - if (result != ISC_R_SUCCESS) { - (void)close(sock->fd); - inc_stats(manager->stats, sock->statsindex[STATID_OPENFAIL]); - return (result); - } - -#ifdef SO_NOSIGPIPE - if (setsockopt(sock->fd, SOL_SOCKET, SO_NOSIGPIPE, (void *)&on, - sizeof(on)) < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, SO_NOSIGPIPE) failed: %s", - sock->fd, strbuf); - /* Press on... */ - } -#endif /* ifdef SO_NOSIGPIPE */ - - /* - * Use minimum mtu if possible. - */ - if (sock->type == isc_sockettype_tcp && sock->pf == AF_INET6) { - use_min_mtu(sock); - set_tcp_maxseg(sock, 1280 - 20 - 40); /* 1280 - TCP - IPV6 */ - } - -#if defined(USE_CMSG) || defined(SET_RCVBUF) || defined(SET_SNDBUF) - if (sock->type == isc_sockettype_udp) { -#if defined(USE_CMSG) -#if defined(SO_TIMESTAMP) - if (setsockopt(sock->fd, SOL_SOCKET, SO_TIMESTAMP, (void *)&on, - sizeof(on)) < 0 && - errno != ENOPROTOOPT) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, SO_TIMESTAMP) failed: " - "%s", - sock->fd, strbuf); - /* Press on... */ - } -#endif /* SO_TIMESTAMP */ - -#ifdef IPV6_RECVPKTINFO - /* RFC 3542 */ - if ((sock->pf == AF_INET6) && - (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, - (void *)&on, sizeof(on)) < 0)) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IPV6_RECVPKTINFO) " - "failed: %s", - sock->fd, strbuf); - } -#else /* ifdef IPV6_RECVPKTINFO */ - /* RFC 2292 */ - if ((sock->pf == AF_INET6) && - (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO, - (void *)&on, sizeof(on)) < 0)) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IPV6_PKTINFO) failed: " - "%s", - sock->fd, strbuf); - } -#endif /* IPV6_RECVPKTINFO */ -#endif /* defined(USE_CMSG) */ - -#if defined(SET_RCVBUF) - optlen = sizeof(size); - if (getsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, (void *)&size, - &optlen) == 0 && - size < rcvbuf) - { - RUNTIME_CHECK(isc_once_do(&rcvbuf_once, set_rcvbuf) == - ISC_R_SUCCESS); - if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, - (void *)&rcvbuf, sizeof(rcvbuf)) == -1) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, SO_RCVBUF, " - "%d) failed: %s", - sock->fd, rcvbuf, strbuf); - } - } -#endif /* if defined(SET_RCVBUF) */ - -#if defined(SET_SNDBUF) - optlen = sizeof(size); - if (getsockopt(sock->fd, SOL_SOCKET, SO_SNDBUF, (void *)&size, - &optlen) == 0 && - size < sndbuf) - { - RUNTIME_CHECK(isc_once_do(&sndbuf_once, set_sndbuf) == - ISC_R_SUCCESS); - if (setsockopt(sock->fd, SOL_SOCKET, SO_SNDBUF, - (void *)&sndbuf, sizeof(sndbuf)) == -1) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, SO_SNDBUF, " - "%d) failed: %s", - sock->fd, sndbuf, strbuf); - } - } -#endif /* if defined(SO_SNDBUF) */ - } -#ifdef IPV6_RECVTCLASS - if ((sock->pf == AF_INET6) && - (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVTCLASS, (void *)&on, - sizeof(on)) < 0)) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IPV6_RECVTCLASS) " - "failed: %s", - sock->fd, strbuf); - } -#endif /* ifdef IPV6_RECVTCLASS */ -#ifdef IP_RECVTOS - if ((sock->pf == AF_INET) && - (setsockopt(sock->fd, IPPROTO_IP, IP_RECVTOS, (void *)&on, - sizeof(on)) < 0)) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IP_RECVTOS) " - "failed: %s", - sock->fd, strbuf); - } -#endif /* ifdef IP_RECVTOS */ -#endif /* defined(USE_CMSG) || defined(SET_RCVBUF) || defined(SET_SNDBUF) */ - - set_ip_disable_pmtud(sock); - - inc_stats(manager->stats, sock->statsindex[STATID_OPEN]); - if (sock->active == 0) { - inc_stats(manager->stats, sock->statsindex[STATID_ACTIVE]); - sock->active = 1; - } - - return (ISC_R_SUCCESS); -} - -/* - * Create a 'type' socket, managed by 'manager'. Events will be posted to - * 'task' and when dispatched 'action' will be called with 'arg' as the arg - * value. The new socket is returned in 'socketp'. - */ -static isc_result_t -socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, - isc_socket_t **socketp) { - isc_socket_t *sock = NULL; - isc__socketthread_t *thread; - isc_result_t result; - int lockid; - - REQUIRE(VALID_MANAGER(manager)); - REQUIRE(socketp != NULL && *socketp == NULL); - - result = allocate_socket(manager, type, &sock); - if (result != ISC_R_SUCCESS) { - return (result); - } - - switch (sock->type) { - case isc_sockettype_udp: - sock->statsindex = (pf == AF_INET) ? udp4statsindex - : udp6statsindex; -#define DCSPPKT(pf) ((pf == AF_INET) ? ISC_NET_DSCPPKTV4 : ISC_NET_DSCPPKTV6) - sock->pktdscp = (isc_net_probedscp() & DCSPPKT(pf)) != 0; - break; - case isc_sockettype_tcp: - sock->statsindex = (pf == AF_INET) ? tcp4statsindex - : tcp6statsindex; - break; - case isc_sockettype_unix: - sock->statsindex = unixstatsindex; - break; - case isc_sockettype_raw: - sock->statsindex = rawstatsindex; - break; - default: - INSIST(0); - ISC_UNREACHABLE(); - } - - sock->pf = pf; - - result = opensocket(manager, sock); - if (result != ISC_R_SUCCESS) { - free_socket(&sock); - return (result); - } - - if (sock->fd == -1) { - abort(); - } - sock->threadid = gen_threadid(sock); - isc_refcount_increment0(&sock->references); - thread = &manager->threads[sock->threadid]; - *socketp = sock; - - /* - * Note we don't have to lock the socket like we normally would because - * there are no external references to it yet. - */ - - lockid = FDLOCK_ID(sock->fd); - LOCK(&thread->fdlock[lockid]); - thread->fds[sock->fd] = sock; - thread->fdstate[sock->fd] = MANAGED; -#if defined(USE_EPOLL) - thread->epoll_events[sock->fd] = 0; -#endif /* if defined(USE_EPOLL) */ -#ifdef USE_DEVPOLL - INSIST(thread->fdpollinfo[sock->fd].want_read == 0 && - thread->fdpollinfo[sock->fd].want_write == 0); -#endif /* ifdef USE_DEVPOLL */ - UNLOCK(&thread->fdlock[lockid]); - - LOCK(&manager->lock); - ISC_LIST_APPEND(manager->socklist, sock, link); -#ifdef USE_SELECT - if (thread->maxfd < sock->fd) { - thread->maxfd = sock->fd; - } -#endif /* ifdef USE_SELECT */ - UNLOCK(&manager->lock); - - socket_log(sock, NULL, CREATION, "created"); - - return (ISC_R_SUCCESS); -} - -/*% - * Create a new 'type' socket managed by 'manager'. Events - * will be posted to 'task' and when dispatched 'action' will be - * called with 'arg' as the arg value. The new socket is returned - * in 'socketp'. - */ -isc_result_t -isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, - isc_socket_t **socketp) { - return (socket_create(manager, pf, type, socketp)); -} - -isc_result_t -isc_socket_open(isc_socket_t *sock) { - isc_result_t result; - isc__socketthread_t *thread; - - REQUIRE(VALID_SOCKET(sock)); - - LOCK(&sock->lock); - - REQUIRE(isc_refcount_current(&sock->references) >= 1); - REQUIRE(sock->fd == -1); - REQUIRE(sock->threadid == -1); - - result = opensocket(sock->manager, sock); - - UNLOCK(&sock->lock); - - if (result != ISC_R_SUCCESS) { - sock->fd = -1; - } else { - sock->threadid = gen_threadid(sock); - thread = &sock->manager->threads[sock->threadid]; - int lockid = FDLOCK_ID(sock->fd); - - LOCK(&thread->fdlock[lockid]); - thread->fds[sock->fd] = sock; - thread->fdstate[sock->fd] = MANAGED; -#if defined(USE_EPOLL) - thread->epoll_events[sock->fd] = 0; -#endif /* if defined(USE_EPOLL) */ -#ifdef USE_DEVPOLL - INSIST(thread->fdpollinfo[sock->fd].want_read == 0 && - thread->fdpollinfo[sock->fd].want_write == 0); -#endif /* ifdef USE_DEVPOLL */ - UNLOCK(&thread->fdlock[lockid]); - -#ifdef USE_SELECT - LOCK(&sock->manager->lock); - if (thread->maxfd < sock->fd) { - thread->maxfd = sock->fd; - } - UNLOCK(&sock->manager->lock); -#endif /* ifdef USE_SELECT */ - } - - return (result); -} - -/* - * Attach to a socket. Caller must explicitly detach when it is done. - */ -void -isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp) { - REQUIRE(VALID_SOCKET(sock)); - REQUIRE(socketp != NULL && *socketp == NULL); - - int old_refs = isc_refcount_increment(&sock->references); - REQUIRE(old_refs > 0); - - *socketp = sock; -} - -/* - * Dereference a socket. If this is the last reference to it, clean things - * up by destroying the socket. - */ -void -isc_socket_detach(isc_socket_t **socketp) { - isc_socket_t *sock; - - REQUIRE(socketp != NULL); - sock = *socketp; - REQUIRE(VALID_SOCKET(sock)); - if (isc_refcount_decrement(&sock->references) == 1) { - destroy(&sock); - } - - *socketp = NULL; -} - -isc_result_t -isc_socket_close(isc_socket_t *sock) { - int fd; - isc_socketmgr_t *manager; - isc__socketthread_t *thread; - - REQUIRE(VALID_SOCKET(sock)); - - LOCK(&sock->lock); - - REQUIRE(sock->fd >= 0 && sock->fd < (int)sock->manager->maxsocks); - - INSIST(!sock->connecting); - INSIST(ISC_LIST_EMPTY(sock->recv_list)); - INSIST(ISC_LIST_EMPTY(sock->send_list)); - INSIST(ISC_LIST_EMPTY(sock->accept_list)); - INSIST(ISC_LIST_EMPTY(sock->connect_list)); - - manager = sock->manager; - thread = &manager->threads[sock->threadid]; - fd = sock->fd; - sock->fd = -1; - sock->threadid = -1; - - memset(sock->name, 0, sizeof(sock->name)); - sock->tag = NULL; - sock->listener = 0; - sock->connected = 0; - sock->connecting = 0; - sock->bound = 0; - isc_sockaddr_any(&sock->peer_address); - - UNLOCK(&sock->lock); - - socketclose(thread, sock, fd); - - return (ISC_R_SUCCESS); -} - -/* - * Dequeue an item off the given socket's read queue, set the result code - * in the done event to the one provided, and send it to the task it was - * destined for. - * - * If the event to be sent is on a list, remove it before sending. If - * asked to, send and detach from the socket as well. - * - * Caller must have the socket locked if the event is attached to the socket. - */ -static void -send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) { - isc_task_t *task; - - task = (*dev)->ev_sender; - - (*dev)->ev_sender = sock; - - if (ISC_LINK_LINKED(*dev, ev_link)) { - ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link); - } - - if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) != 0) { - isc_task_sendtoanddetach(&task, (isc_event_t **)dev, - sock->threadid); - } else { - isc_task_sendto(task, (isc_event_t **)dev, sock->threadid); - } -} - -/* - * See comments for send_recvdone_event() above. - * - * Caller must have the socket locked if the event is attached to the socket. - */ -static void -send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) { - isc_task_t *task; - - INSIST(dev != NULL && *dev != NULL); - - task = (*dev)->ev_sender; - (*dev)->ev_sender = sock; - - if (ISC_LINK_LINKED(*dev, ev_link)) { - ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link); - } - - if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) != 0) { - isc_task_sendtoanddetach(&task, (isc_event_t **)dev, - sock->threadid); - } else { - isc_task_sendto(task, (isc_event_t **)dev, sock->threadid); - } -} - -/* - * See comments for send_recvdone_event() above. - * - * Caller must have the socket locked if the event is attached to the socket. - */ -static void -send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **dev) { - isc_task_t *task; - - INSIST(dev != NULL && *dev != NULL); - - task = (*dev)->ev_sender; - (*dev)->ev_sender = sock; - - if (ISC_LINK_LINKED(*dev, ev_link)) { - ISC_LIST_DEQUEUE(sock->connect_list, *dev, ev_link); - } - - isc_task_sendtoanddetach(&task, (isc_event_t **)dev, sock->threadid); -} - -/* - * Call accept() on a socket, to get the new file descriptor. The listen - * socket is used as a prototype to create a new isc_socket_t. The new - * socket has one outstanding reference. The task receiving the event - * will be detached from just after the event is delivered. - * - * On entry to this function, the event delivered is the internal - * readable event, and the first item on the accept_list should be - * the done event we want to send. If the list is empty, this is a no-op, - * so just unlock and return. - */ -static void -internal_accept(isc_socket_t *sock) { - isc_socketmgr_t *manager; - isc__socketthread_t *thread, *nthread; - isc_socket_newconnev_t *dev; - isc_task_t *task; - socklen_t addrlen; - int fd; - isc_result_t result = ISC_R_SUCCESS; - char strbuf[ISC_STRERRORSIZE]; - const char *err = "accept"; - - INSIST(VALID_SOCKET(sock)); - REQUIRE(sock->fd >= 0); - - socket_log(sock, NULL, TRACE, "internal_accept called, locked socket"); - - manager = sock->manager; - INSIST(VALID_MANAGER(manager)); - thread = &manager->threads[sock->threadid]; - - INSIST(sock->listener); - - /* - * Get the first item off the accept list. - * If it is empty, unlock the socket and return. - */ - dev = ISC_LIST_HEAD(sock->accept_list); - if (dev == NULL) { - unwatch_fd(thread, sock->fd, SELECT_POKE_ACCEPT); - UNLOCK(&sock->lock); - return; - } - - /* - * Try to accept the new connection. If the accept fails with - * EAGAIN or EINTR, simply poke the watcher to watch this socket - * again. Also ignore ECONNRESET, which has been reported to - * be spuriously returned on Linux 2.2.19 although it is not - * a documented error for accept(). ECONNABORTED has been - * reported for Solaris 8. The rest are thrown in not because - * we have seen them but because they are ignored by other - * daemons such as BIND 8 and Apache. - */ - - addrlen = sizeof(NEWCONNSOCK(dev)->peer_address.type); - memset(&NEWCONNSOCK(dev)->peer_address.type, 0, addrlen); - fd = accept(sock->fd, &NEWCONNSOCK(dev)->peer_address.type.sa, - (void *)&addrlen); - -#ifdef F_DUPFD - /* - * Leave a space for stdio to work in. - */ - if (fd >= 0 && fd < 20) { - int newfd, tmp; - newfd = fcntl(fd, F_DUPFD, 20); - tmp = errno; - (void)close(fd); - errno = tmp; - fd = newfd; - err = "accept/fcntl"; - } -#endif /* ifdef F_DUPFD */ - - if (fd < 0) { - if (SOFT_ERROR(errno)) { - goto soft_error; - } - switch (errno) { - case ENFILE: - case EMFILE: - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "%s: too many open file descriptors", - err); - goto soft_error; - - case ENOBUFS: - case ENOMEM: - case ECONNRESET: - case ECONNABORTED: - case EHOSTUNREACH: - case EHOSTDOWN: - case ENETUNREACH: - case ENETDOWN: - case ECONNREFUSED: -#ifdef EPROTO - case EPROTO: -#endif /* ifdef EPROTO */ -#ifdef ENONET - case ENONET: -#endif /* ifdef ENONET */ - goto soft_error; - default: - break; - } - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "internal_accept: %s() failed: %s", err, - strbuf); - fd = -1; - result = ISC_R_UNEXPECTED; - } else { - if (addrlen == 0U) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "internal_accept(): " - "accept() failed to return " - "remote address"); - - (void)close(fd); - goto soft_error; - } else if (NEWCONNSOCK(dev)->peer_address.type.sa.sa_family != - sock->pf) { - UNEXPECTED_ERROR( - __FILE__, __LINE__, - "internal_accept(): " - "accept() returned peer address " - "family %u (expected %u)", - NEWCONNSOCK(dev)->peer_address.type.sa.sa_family, - sock->pf); - (void)close(fd); - goto soft_error; - } else if (fd >= (int)manager->maxsocks) { - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "accept: file descriptor exceeds limit " - "(%d/%u)", - fd, manager->maxsocks); - (void)close(fd); - goto soft_error; - } - } - - if (fd != -1) { - NEWCONNSOCK(dev)->peer_address.length = addrlen; - NEWCONNSOCK(dev)->pf = sock->pf; - } - - /* - * Pull off the done event. - */ - ISC_LIST_UNLINK(sock->accept_list, dev, ev_link); - - /* - * Poke watcher if there are more pending accepts. - */ - if (ISC_LIST_EMPTY(sock->accept_list)) { - unwatch_fd(thread, sock->fd, SELECT_POKE_ACCEPT); - } - - if (fd != -1) { - result = make_nonblock(fd); - if (result != ISC_R_SUCCESS) { - (void)close(fd); - fd = -1; - } - } - - /* - * We need to unlock sock->lock now to be able to lock manager->lock - * without risking a deadlock with xmlstats. - */ - UNLOCK(&sock->lock); - - /* - * -1 means the new socket didn't happen. - */ - if (fd != -1) { - int lockid = FDLOCK_ID(fd); - - NEWCONNSOCK(dev)->fd = fd; - NEWCONNSOCK(dev)->threadid = gen_threadid(NEWCONNSOCK(dev)); - NEWCONNSOCK(dev)->bound = 1; - NEWCONNSOCK(dev)->connected = 1; - nthread = &manager->threads[NEWCONNSOCK(dev)->threadid]; - - /* - * We already hold a lock on one fdlock in accepting thread, - * we need to make sure that we don't double lock. - */ - bool same_bucket = (sock->threadid == - NEWCONNSOCK(dev)->threadid) && - (FDLOCK_ID(sock->fd) == lockid); - - /* - * Use minimum mtu if possible. - */ - use_min_mtu(NEWCONNSOCK(dev)); - set_tcp_maxseg(NEWCONNSOCK(dev), 1280 - 20 - 40); - - /* - * Ensure DSCP settings are inherited across accept. - */ - setdscp(NEWCONNSOCK(dev), sock->dscp); - - /* - * Save away the remote address - */ - dev->address = NEWCONNSOCK(dev)->peer_address; - - if (NEWCONNSOCK(dev)->active == 0) { - inc_stats(manager->stats, - NEWCONNSOCK(dev)->statsindex[STATID_ACTIVE]); - NEWCONNSOCK(dev)->active = 1; - } - - if (!same_bucket) { - LOCK(&nthread->fdlock[lockid]); - } - nthread->fds[fd] = NEWCONNSOCK(dev); - nthread->fdstate[fd] = MANAGED; -#if defined(USE_EPOLL) - nthread->epoll_events[fd] = 0; -#endif /* if defined(USE_EPOLL) */ - if (!same_bucket) { - UNLOCK(&nthread->fdlock[lockid]); - } - - LOCK(&manager->lock); - -#ifdef USE_SELECT - if (nthread->maxfd < fd) { - nthread->maxfd = fd; - } -#endif /* ifdef USE_SELECT */ - - socket_log(sock, &NEWCONNSOCK(dev)->peer_address, CREATION, - "accepted connection, new socket %p", - dev->newsocket); - - ISC_LIST_APPEND(manager->socklist, NEWCONNSOCK(dev), link); - - UNLOCK(&manager->lock); - - inc_stats(manager->stats, sock->statsindex[STATID_ACCEPT]); - } else { - inc_stats(manager->stats, sock->statsindex[STATID_ACCEPTFAIL]); - isc_refcount_decrementz(&NEWCONNSOCK(dev)->references); - free_socket((isc_socket_t **)&dev->newsocket); - } - - /* - * Fill in the done event details and send it off. - */ - dev->result = result; - task = dev->ev_sender; - dev->ev_sender = sock; - - isc_task_sendtoanddetach(&task, ISC_EVENT_PTR(&dev), sock->threadid); - return; - -soft_error: - watch_fd(thread, sock->fd, SELECT_POKE_ACCEPT); - UNLOCK(&sock->lock); - - inc_stats(manager->stats, sock->statsindex[STATID_ACCEPTFAIL]); - return; -} - -static void -internal_recv(isc_socket_t *sock) { - isc_socketevent_t *dev; - - INSIST(VALID_SOCKET(sock)); - REQUIRE(sock->fd >= 0); - - dev = ISC_LIST_HEAD(sock->recv_list); - if (dev == NULL) { - goto finish; - } - - socket_log(sock, NULL, IOEVENT, "internal_recv: event %p -> task %p", - dev, dev->ev_sender); - - /* - * Try to do as much I/O as possible on this socket. There are no - * limits here, currently. - */ - while (dev != NULL) { - switch (doio_recv(sock, dev)) { - case DOIO_SOFT: - goto finish; - - case DOIO_EOF: - /* - * read of 0 means the remote end was closed. - * Run through the event queue and dispatch all - * the events with an EOF result code. - */ - do { - dev->result = ISC_R_EOF; - send_recvdone_event(sock, &dev); - dev = ISC_LIST_HEAD(sock->recv_list); - } while (dev != NULL); - goto finish; - - case DOIO_SUCCESS: - case DOIO_HARD: - send_recvdone_event(sock, &dev); - break; - } - - dev = ISC_LIST_HEAD(sock->recv_list); - } - -finish: - if (ISC_LIST_EMPTY(sock->recv_list)) { - unwatch_fd(&sock->manager->threads[sock->threadid], sock->fd, - SELECT_POKE_READ); - } -} - -static void -internal_send(isc_socket_t *sock) { - isc_socketevent_t *dev; - - INSIST(VALID_SOCKET(sock)); - REQUIRE(sock->fd >= 0); - - dev = ISC_LIST_HEAD(sock->send_list); - if (dev == NULL) { - goto finish; - } - socket_log(sock, NULL, EVENT, "internal_send: event %p -> task %p", dev, - dev->ev_sender); - - /* - * Try to do as much I/O as possible on this socket. There are no - * limits here, currently. - */ - while (dev != NULL) { - switch (doio_send(sock, dev)) { - case DOIO_SOFT: - goto finish; - - case DOIO_HARD: - case DOIO_SUCCESS: - send_senddone_event(sock, &dev); - break; - } - - dev = ISC_LIST_HEAD(sock->send_list); - } - -finish: - if (ISC_LIST_EMPTY(sock->send_list)) { - unwatch_fd(&sock->manager->threads[sock->threadid], sock->fd, - SELECT_POKE_WRITE); - } -} - -/* - * Process read/writes on each fd here. Avoid locking - * and unlocking twice if both reads and writes are possible. - */ -static void -process_fd(isc__socketthread_t *thread, int fd, bool readable, bool writeable) { - isc_socket_t *sock; - int lockid = FDLOCK_ID(fd); - - /* - * If the socket is going to be closed, don't do more I/O. - */ - LOCK(&thread->fdlock[lockid]); - if (thread->fdstate[fd] == CLOSE_PENDING) { - UNLOCK(&thread->fdlock[lockid]); - - (void)unwatch_fd(thread, fd, SELECT_POKE_READ); - (void)unwatch_fd(thread, fd, SELECT_POKE_WRITE); - return; - } - - sock = thread->fds[fd]; - if (sock == NULL) { - UNLOCK(&thread->fdlock[lockid]); - return; - } - - LOCK(&sock->lock); - - if (sock->fd < 0) { - /* - * Sock is being closed - the final external reference - * is gone but it was not yet removed from event loop - * and fdstate[]/fds[] as destroy() is waiting on - * thread->fdlock[lockid] or sock->lock that we're holding. - * Just release the locks and bail. - */ - UNLOCK(&sock->lock); - UNLOCK(&thread->fdlock[lockid]); - return; - } - - REQUIRE(readable || writeable); - if (writeable) { - if (sock->connecting) { - internal_connect(sock); - } else { - internal_send(sock); - } - } - - if (readable) { - if (sock->listener) { - internal_accept(sock); /* unlocks sock */ - } else { - internal_recv(sock); - UNLOCK(&sock->lock); - } - } else { - UNLOCK(&sock->lock); - } - - UNLOCK(&thread->fdlock[lockid]); - - /* - * Socket destruction might be pending, it will resume - * after releasing fdlock and sock->lock. - */ -} - -/* - * process_fds is different for different event loops - * it takes the events from event loops and for each FD - * launches process_fd - */ -#ifdef USE_KQUEUE -static bool -process_fds(isc__socketthread_t *thread, struct kevent *events, int nevents) { - int i; - bool readable, writable; - bool done = false; - bool have_ctlevent = false; - if (nevents == thread->nevents) { - /* - * This is not an error, but something unexpected. If this - * happens, it may indicate the need for increasing - * ISC_SOCKET_MAXEVENTS. - */ - thread_log(thread, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_INFO, - "maximum number of FD events (%d) received", - nevents); - } - - for (i = 0; i < nevents; i++) { - REQUIRE(events[i].ident < thread->manager->maxsocks); - if (events[i].ident == (uintptr_t)thread->pipe_fds[0]) { - have_ctlevent = true; - continue; - } - readable = (events[i].filter == EVFILT_READ); - writable = (events[i].filter == EVFILT_WRITE); - process_fd(thread, events[i].ident, readable, writable); - } - - if (have_ctlevent) { - done = process_ctlfd(thread); - } - - return (done); -} -#elif defined(USE_EPOLL) -static bool -process_fds(isc__socketthread_t *thread, struct epoll_event *events, - int nevents) { - int i; - bool done = false; - bool have_ctlevent = false; - - if (nevents == thread->nevents) { - thread_log(thread, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_INFO, - "maximum number of FD events (%d) received", - nevents); - } - - for (i = 0; i < nevents; i++) { - REQUIRE(events[i].data.fd < (int)thread->manager->maxsocks); - if (events[i].data.fd == thread->pipe_fds[0]) { - have_ctlevent = true; - continue; - } - if ((events[i].events & EPOLLERR) != 0 || - (events[i].events & EPOLLHUP) != 0) { - /* - * epoll does not set IN/OUT bits on an erroneous - * condition, so we need to try both anyway. This is a - * bit inefficient, but should be okay for such rare - * events. Note also that the read or write attempt - * won't block because we use non-blocking sockets. - */ - int fd = events[i].data.fd; - events[i].events |= thread->epoll_events[fd]; - } - process_fd(thread, events[i].data.fd, - (events[i].events & EPOLLIN) != 0, - (events[i].events & EPOLLOUT) != 0); - } - - if (have_ctlevent) { - done = process_ctlfd(thread); - } - - return (done); -} -#elif defined(USE_DEVPOLL) -static bool -process_fds(isc__socketthread_t *thread, struct pollfd *events, int nevents) { - int i; - bool done = false; - bool have_ctlevent = false; - - if (nevents == thread->nevents) { - thread_log(thread, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_INFO, - "maximum number of FD events (%d) received", - nevents); - } - - for (i = 0; i < nevents; i++) { - REQUIRE(events[i].fd < (int)thread->manager->maxsocks); - if (events[i].fd == thread->pipe_fds[0]) { - have_ctlevent = true; - continue; - } - process_fd(thread, events[i].fd, - (events[i].events & POLLIN) != 0, - (events[i].events & POLLOUT) != 0); - } - - if (have_ctlevent) { - done = process_ctlfd(thread); - } - - return (done); -} -#elif defined(USE_SELECT) -static void -process_fds(isc__socketthread_t *thread, int maxfd, fd_set *readfds, - fd_set *writefds) { - int i; - - REQUIRE(maxfd <= (int)thread->manager->maxsocks); - - for (i = 0; i < maxfd; i++) { - if (i == thread->pipe_fds[0] || i == thread->pipe_fds[1]) { - continue; - } - process_fd(thread, i, FD_ISSET(i, readfds), - FD_ISSET(i, writefds)); - } -} -#endif /* ifdef USE_KQUEUE */ - -static bool -process_ctlfd(isc__socketthread_t *thread) { - int msg, fd; - - for (;;) { - select_readmsg(thread, &fd, &msg); - - thread_log(thread, IOEVENT, - "watcher got message %d for socket %d", msg, fd); - - /* - * Nothing to read? - */ - if (msg == SELECT_POKE_NOTHING) { - break; - } - - /* - * Handle shutdown message. We really should - * jump out of this loop right away, but - * it doesn't matter if we have to do a little - * more work first. - */ - if (msg == SELECT_POKE_SHUTDOWN) { - return (true); - } - - /* - * This is a wakeup on a socket. Look - * at the event queue for both read and write, - * and decide if we need to watch on it now - * or not. - */ - wakeup_socket(thread, fd, msg); - } - - return (false); -} - -/* - * This is the thread that will loop forever, always in a select or poll - * call. - * - * When select returns something to do, do whatever's necessary and post - * an event to the task that was requesting the action. - */ -static isc_threadresult_t -netthread(void *uap) { - isc__socketthread_t *thread = uap; - isc_socketmgr_t *manager = thread->manager; - (void)manager; - bool done; - int cc; -#ifdef USE_KQUEUE - const char *fnname = "kevent()"; -#elif defined(USE_EPOLL) - const char *fnname = "epoll_wait()"; -#elif defined(USE_DEVPOLL) - isc_result_t result; - const char *fnname = "ioctl(DP_POLL)"; - struct dvpoll dvp; - int pass; -#if defined(ISC_SOCKET_USE_POLLWATCH) - pollstate_t pollstate = poll_idle; -#endif /* if defined(ISC_SOCKET_USE_POLLWATCH) */ -#elif defined(USE_SELECT) - const char *fnname = "select()"; - int maxfd; - int ctlfd; -#endif /* ifdef USE_KQUEUE */ - char strbuf[ISC_STRERRORSIZE]; - -#if defined(USE_SELECT) - /* - * Get the control fd here. This will never change. - */ - ctlfd = thread->pipe_fds[0]; -#endif /* if defined(USE_SELECT) */ - done = false; - while (!done) { - do { -#ifdef USE_KQUEUE - cc = kevent(thread->kqueue_fd, NULL, 0, thread->events, - thread->nevents, NULL); -#elif defined(USE_EPOLL) - cc = epoll_wait(thread->epoll_fd, thread->events, - thread->nevents, -1); -#elif defined(USE_DEVPOLL) - /* - * Re-probe every thousand calls. - */ - if (thread->calls++ > 1000U) { - result = isc_resource_getcurlimit( - isc_resource_openfiles, - &thread->open_max); - if (result != ISC_R_SUCCESS) { - thread->open_max = 64; - } - thread->calls = 0; - } - for (pass = 0; pass < 2; pass++) { - dvp.dp_fds = thread->events; - dvp.dp_nfds = thread->nevents; - if (dvp.dp_nfds >= thread->open_max) { - dvp.dp_nfds = thread->open_max - 1; - } -#ifndef ISC_SOCKET_USE_POLLWATCH - dvp.dp_timeout = -1; -#else /* ifndef ISC_SOCKET_USE_POLLWATCH */ - if (pollstate == poll_idle) { - dvp.dp_timeout = -1; - } else { - dvp.dp_timeout = - ISC_SOCKET_POLLWATCH_TIMEOUT; - } -#endif /* ISC_SOCKET_USE_POLLWATCH */ - cc = ioctl(thread->devpoll_fd, DP_POLL, &dvp); - if (cc == -1 && errno == EINVAL) { - /* - * {OPEN_MAX} may have dropped. Look - * up the current value and try again. - */ - result = isc_resource_getcurlimit( - isc_resource_openfiles, - &thread->open_max); - if (result != ISC_R_SUCCESS) { - thread->open_max = 64; - } - } else { - break; - } - } -#elif defined(USE_SELECT) - /* - * We will have only one thread anyway, we can lock - * manager lock and don't care - */ - LOCK(&manager->lock); - memmove(thread->read_fds_copy, thread->read_fds, - thread->fd_bufsize); - memmove(thread->write_fds_copy, thread->write_fds, - thread->fd_bufsize); - maxfd = thread->maxfd + 1; - UNLOCK(&manager->lock); - - cc = select(maxfd, thread->read_fds_copy, - thread->write_fds_copy, NULL, NULL); -#endif /* USE_KQUEUE */ - - if (cc < 0 && !SOFT_ERROR(errno)) { - strerror_r(errno, strbuf, sizeof(strbuf)); - FATAL_ERROR(__FILE__, __LINE__, "%s failed: %s", - fnname, strbuf); - } - -#if defined(USE_DEVPOLL) && defined(ISC_SOCKET_USE_POLLWATCH) - if (cc == 0) { - if (pollstate == poll_active) { - pollstate = poll_checking; - } else if (pollstate == poll_checking) { - pollstate = poll_idle; - } - } else if (cc > 0) { - if (pollstate == poll_checking) { - /* - * XXX: We'd like to use a more - * verbose log level as it's actually an - * unexpected event, but the kernel bug - * reportedly happens pretty frequently - * (and it can also be a false positive) - * so it would be just too noisy. - */ - thread_log(thread, - ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, - ISC_LOG_DEBUG(1), - "unexpected POLL timeout"); - } - pollstate = poll_active; - } -#endif /* if defined(USE_DEVPOLL) && defined(ISC_SOCKET_USE_POLLWATCH) */ - } while (cc < 0); - -#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) - done = process_fds(thread, thread->events, cc); -#elif defined(USE_SELECT) - process_fds(thread, maxfd, thread->read_fds_copy, - thread->write_fds_copy); - - /* - * Process reads on internal, control fd. - */ - if (FD_ISSET(ctlfd, thread->read_fds_copy)) { - done = process_ctlfd(thread); - } -#endif /* if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) \ - * */ - } - - thread_log(thread, TRACE, "watcher exiting"); - return ((isc_threadresult_t)0); -} - -void -isc_socketmgr_setreserved(isc_socketmgr_t *manager, uint32_t reserved) { - REQUIRE(VALID_MANAGER(manager)); - - manager->reserved = reserved; -} - -void -isc_socketmgr_maxudp(isc_socketmgr_t *manager, unsigned int maxudp) { - REQUIRE(VALID_MANAGER(manager)); - - manager->maxudp = maxudp; -} - -/* - * Setup socket thread, thread->manager and thread->threadid must be filled. - */ - -static isc_result_t -setup_thread(isc__socketthread_t *thread) { - isc_result_t result = ISC_R_SUCCESS; - int i; - char strbuf[ISC_STRERRORSIZE]; - - REQUIRE(thread != NULL); - REQUIRE(VALID_MANAGER(thread->manager)); - REQUIRE(thread->threadid >= 0 && - thread->threadid < thread->manager->nthreads); - - thread->fds = - isc_mem_get(thread->manager->mctx, - thread->manager->maxsocks * sizeof(isc_socket_t *)); - - memset(thread->fds, 0, - thread->manager->maxsocks * sizeof(isc_socket_t *)); - - thread->fdstate = isc_mem_get(thread->manager->mctx, - thread->manager->maxsocks * sizeof(int)); - - memset(thread->fdstate, 0, thread->manager->maxsocks * sizeof(int)); - - thread->fdlock = isc_mem_get(thread->manager->mctx, - FDLOCK_COUNT * sizeof(isc_mutex_t)); - - for (i = 0; i < FDLOCK_COUNT; i++) { - isc_mutex_init(&thread->fdlock[i]); - } - - if (pipe(thread->pipe_fds) != 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "pipe() failed: %s", - strbuf); - return (ISC_R_UNEXPECTED); - } - RUNTIME_CHECK(make_nonblock(thread->pipe_fds[0]) == ISC_R_SUCCESS); - -#ifdef USE_KQUEUE - thread->nevents = ISC_SOCKET_MAXEVENTS; - thread->events = isc_mem_get(thread->manager->mctx, - sizeof(struct kevent) * thread->nevents); - - thread->kqueue_fd = kqueue(); - if (thread->kqueue_fd == -1) { - result = isc__errno2result(errno); - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "kqueue failed: %s", - strbuf); - isc_mem_put(thread->manager->mctx, thread->events, - sizeof(struct kevent) * thread->nevents); - return (result); - } - - result = watch_fd(thread, thread->pipe_fds[0], SELECT_POKE_READ); - if (result != ISC_R_SUCCESS) { - close(thread->kqueue_fd); - isc_mem_put(thread->manager->mctx, thread->events, - sizeof(struct kevent) * thread->nevents); - } - return (result); - -#elif defined(USE_EPOLL) - thread->nevents = ISC_SOCKET_MAXEVENTS; - thread->epoll_events = - isc_mem_get(thread->manager->mctx, - (thread->manager->maxsocks * sizeof(uint32_t))); - - memset(thread->epoll_events, 0, - thread->manager->maxsocks * sizeof(uint32_t)); - - thread->events = - isc_mem_get(thread->manager->mctx, - sizeof(struct epoll_event) * thread->nevents); - - thread->epoll_fd = epoll_create(thread->nevents); - if (thread->epoll_fd == -1) { - result = isc__errno2result(errno); - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "epoll_create failed: %s", - strbuf); - return (result); - } - - result = watch_fd(thread, thread->pipe_fds[0], SELECT_POKE_READ); - return (result); - -#elif defined(USE_DEVPOLL) - thread->nevents = ISC_SOCKET_MAXEVENTS; - result = isc_resource_getcurlimit(isc_resource_openfiles, - &thread->open_max); - if (result != ISC_R_SUCCESS) { - thread->open_max = 64; - } - thread->calls = 0; - thread->events = isc_mem_get(thread->manager->mctx, - sizeof(struct pollfd) * thread->nevents); - - /* - * Note: fdpollinfo should be able to support all possible FDs, so - * it must have maxsocks entries (not nevents). - */ - thread->fdpollinfo = - isc_mem_get(thread->manager->mctx, - sizeof(pollinfo_t) * thread->manager->maxsocks); - memset(thread->fdpollinfo, 0, - sizeof(pollinfo_t) * thread->manager->maxsocks); - thread->devpoll_fd = open("/dev/poll", O_RDWR); - if (thread->devpoll_fd == -1) { - result = isc__errno2result(errno); - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "open(/dev/poll) failed: %s", strbuf); - isc_mem_put(thread->manager->mctx, thread->events, - sizeof(struct pollfd) * thread->nevents); - isc_mem_put(thread->manager->mctx, thread->fdpollinfo, - sizeof(pollinfo_t) * thread->manager->maxsocks); - return (result); - } - result = watch_fd(thread, thread->pipe_fds[0], SELECT_POKE_READ); - if (result != ISC_R_SUCCESS) { - close(thread->devpoll_fd); - isc_mem_put(thread->manager->mctx, thread->events, - sizeof(struct pollfd) * thread->nevents); - isc_mem_put(thread->manager->mctx, thread->fdpollinfo, - sizeof(pollinfo_t) * thread->manager->maxsocks); - return (result); - } - - return (ISC_R_SUCCESS); -#elif defined(USE_SELECT) - UNUSED(result); - -#if ISC_SOCKET_MAXSOCKETS > FD_SETSIZE - /* - * Note: this code should also cover the case of MAXSOCKETS <= - * FD_SETSIZE, but we separate the cases to avoid possible portability - * issues regarding howmany() and the actual representation of fd_set. - */ - thread->fd_bufsize = howmany(manager->maxsocks, NFDBITS) * - sizeof(fd_mask); -#else /* if ISC_SOCKET_MAXSOCKETS > FD_SETSIZE */ - thread->fd_bufsize = sizeof(fd_set); -#endif /* if ISC_SOCKET_MAXSOCKETS > FD_SETSIZE */ - - thread->read_fds = isc_mem_get(thread->manager->mctx, - thread->fd_bufsize); - thread->read_fds_copy = isc_mem_get(thread->manager->mctx, - thread->fd_bufsize); - thread->write_fds = isc_mem_get(thread->manager->mctx, - thread->fd_bufsize); - thread->write_fds_copy = isc_mem_get(thread->manager->mctx, - thread->fd_bufsize); - memset(thread->read_fds, 0, thread->fd_bufsize); - memset(thread->write_fds, 0, thread->fd_bufsize); - - (void)watch_fd(thread, thread->pipe_fds[0], SELECT_POKE_READ); - thread->maxfd = thread->pipe_fds[0]; - - return (ISC_R_SUCCESS); -#endif /* USE_KQUEUE */ -} - -static void -cleanup_thread(isc_mem_t *mctx, isc__socketthread_t *thread) { - isc_result_t result; - int i; - - result = unwatch_fd(thread, thread->pipe_fds[0], SELECT_POKE_READ); - if (result != ISC_R_SUCCESS) { - UNEXPECTED_ERROR(__FILE__, __LINE__, "epoll_ctl(DEL) failed"); - } -#ifdef USE_KQUEUE - close(thread->kqueue_fd); - isc_mem_put(mctx, thread->events, - sizeof(struct kevent) * thread->nevents); -#elif defined(USE_EPOLL) - close(thread->epoll_fd); - - isc_mem_put(mctx, thread->events, - sizeof(struct epoll_event) * thread->nevents); -#elif defined(USE_DEVPOLL) - close(thread->devpoll_fd); - isc_mem_put(mctx, thread->events, - sizeof(struct pollfd) * thread->nevents); - isc_mem_put(mctx, thread->fdpollinfo, - sizeof(pollinfo_t) * thread->manager->maxsocks); -#elif defined(USE_SELECT) - if (thread->read_fds != NULL) { - isc_mem_put(mctx, thread->read_fds, thread->fd_bufsize); - } - if (thread->read_fds_copy != NULL) { - isc_mem_put(mctx, thread->read_fds_copy, thread->fd_bufsize); - } - if (thread->write_fds != NULL) { - isc_mem_put(mctx, thread->write_fds, thread->fd_bufsize); - } - if (thread->write_fds_copy != NULL) { - isc_mem_put(mctx, thread->write_fds_copy, thread->fd_bufsize); - } -#endif /* USE_KQUEUE */ - for (i = 0; i < (int)thread->manager->maxsocks; i++) { - if (thread->fdstate[i] == CLOSE_PENDING) { - /* no need to lock */ - (void)close(i); - } - } - -#if defined(USE_EPOLL) - isc_mem_put(thread->manager->mctx, thread->epoll_events, - thread->manager->maxsocks * sizeof(uint32_t)); -#endif /* if defined(USE_EPOLL) */ - isc_mem_put(thread->manager->mctx, thread->fds, - thread->manager->maxsocks * sizeof(isc_socket_t *)); - isc_mem_put(thread->manager->mctx, thread->fdstate, - thread->manager->maxsocks * sizeof(int)); - - for (i = 0; i < FDLOCK_COUNT; i++) { - isc_mutex_destroy(&thread->fdlock[i]); - } - isc_mem_put(thread->manager->mctx, thread->fdlock, - FDLOCK_COUNT * sizeof(isc_mutex_t)); -} - -isc_result_t -isc__socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp, - unsigned int maxsocks, int nthreads) { - int i; - isc_socketmgr_t *manager; - - REQUIRE(managerp != NULL && *managerp == NULL); - - if (maxsocks == 0) { - maxsocks = ISC_SOCKET_MAXSOCKETS; - } - - manager = isc_mem_get(mctx, sizeof(*manager)); - - /* zero-clear so that necessary cleanup on failure will be easy */ - memset(manager, 0, sizeof(*manager)); - manager->maxsocks = maxsocks; - manager->reserved = 0; - manager->maxudp = 0; - manager->nthreads = nthreads; - manager->stats = NULL; - - manager->magic = SOCKET_MANAGER_MAGIC; - manager->mctx = NULL; - ISC_LIST_INIT(manager->socklist); - isc_mutex_init(&manager->lock); - isc_condition_init(&manager->shutdown_ok); - - /* - * Start up the select/poll thread. - */ - manager->threads = isc_mem_get(mctx, sizeof(isc__socketthread_t) * - manager->nthreads); - isc_mem_attach(mctx, &manager->mctx); - - for (i = 0; i < manager->nthreads; i++) { - manager->threads[i].manager = manager; - manager->threads[i].threadid = i; - setup_thread(&manager->threads[i]); - isc_thread_create(netthread, &manager->threads[i], - &manager->threads[i].thread); - char tname[1024]; - sprintf(tname, "isc-socket-%d", i); - isc_thread_setname(manager->threads[i].thread, tname); - } - - *managerp = manager; - - return (ISC_R_SUCCESS); -} - -isc_result_t -isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp) { - REQUIRE(VALID_MANAGER(manager)); - REQUIRE(nsockp != NULL); - - *nsockp = manager->maxsocks; - - return (ISC_R_SUCCESS); -} - -void -isc_socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats) { - REQUIRE(VALID_MANAGER(manager)); - REQUIRE(ISC_LIST_EMPTY(manager->socklist)); - REQUIRE(manager->stats == NULL); - REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max); - - isc_stats_attach(stats, &manager->stats); -} - -void -isc__socketmgr_destroy(isc_socketmgr_t **managerp) { - isc_socketmgr_t *manager; - - /* - * Destroy a socket manager. - */ - - REQUIRE(managerp != NULL); - manager = *managerp; - REQUIRE(VALID_MANAGER(manager)); - - LOCK(&manager->lock); - - /* - * Wait for all sockets to be destroyed. - */ - while (!ISC_LIST_EMPTY(manager->socklist)) { - manager_log(manager, CREATION, "sockets exist"); - WAIT(&manager->shutdown_ok, &manager->lock); - } - - UNLOCK(&manager->lock); - - /* - * Here, poke our select/poll thread. Do this by closing the write - * half of the pipe, which will send EOF to the read half. - * This is currently a no-op in the non-threaded case. - */ - for (int i = 0; i < manager->nthreads; i++) { - select_poke(manager, i, 0, SELECT_POKE_SHUTDOWN); - } - - /* - * Wait for thread to exit. - */ - for (int i = 0; i < manager->nthreads; i++) { - isc_thread_join(manager->threads[i].thread, NULL); - cleanup_thread(manager->mctx, &manager->threads[i]); - } - /* - * Clean up. - */ - isc_mem_put(manager->mctx, manager->threads, - sizeof(isc__socketthread_t) * manager->nthreads); - (void)isc_condition_destroy(&manager->shutdown_ok); - - if (manager->stats != NULL) { - isc_stats_detach(&manager->stats); - } - isc_mutex_destroy(&manager->lock); - manager->magic = 0; - isc_mem_putanddetach(&manager->mctx, manager, sizeof(*manager)); - - *managerp = NULL; -} - -static isc_result_t -socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, - unsigned int flags) { - int io_state; - bool have_lock = false; - isc_task_t *ntask = NULL; - isc_result_t result = ISC_R_SUCCESS; - - dev->ev_sender = task; - - if (sock->type == isc_sockettype_udp) { - io_state = doio_recv(sock, dev); - } else { - LOCK(&sock->lock); - have_lock = true; - - if (ISC_LIST_EMPTY(sock->recv_list)) { - io_state = doio_recv(sock, dev); - } else { - io_state = DOIO_SOFT; - } - } - - switch (io_state) { - case DOIO_SOFT: - /* - * We couldn't read all or part of the request right now, so - * queue it. - * - * Attach to socket and to task - */ - isc_task_attach(task, &ntask); - dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; - - if (!have_lock) { - LOCK(&sock->lock); - have_lock = true; - } - - /* - * Enqueue the request. If the socket was previously not being - * watched, poke the watcher to start paying attention to it. - */ - bool do_poke = ISC_LIST_EMPTY(sock->recv_list); - ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link); - if (do_poke) { - select_poke(sock->manager, sock->threadid, sock->fd, - SELECT_POKE_READ); - } - - socket_log(sock, NULL, EVENT, - "socket_recv: event %p -> task %p", dev, ntask); - - if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) { - result = ISC_R_INPROGRESS; - } - break; - - case DOIO_EOF: - dev->result = ISC_R_EOF; - /* fallthrough */ - - case DOIO_HARD: - case DOIO_SUCCESS: - if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0) { - send_recvdone_event(sock, &dev); - } - break; - } - - if (have_lock) { - UNLOCK(&sock->lock); - } - - return (result); -} - -isc_result_t -isc_socket_recv(isc_socket_t *sock, isc_region_t *region, unsigned int minimum, - isc_task_t *task, isc_taskaction_t action, void *arg) { - isc_socketevent_t *dev; - isc_socketmgr_t *manager; - - REQUIRE(VALID_SOCKET(sock)); - REQUIRE(action != NULL); - - manager = sock->manager; - REQUIRE(VALID_MANAGER(manager)); - - INSIST(sock->bound); - - dev = allocate_socketevent(manager->mctx, sock, ISC_SOCKEVENT_RECVDONE, - action, arg); - if (dev == NULL) { - return (ISC_R_NOMEMORY); - } - - return (isc_socket_recv2(sock, region, minimum, task, dev, 0)); -} - -isc_result_t -isc_socket_recv2(isc_socket_t *sock, isc_region_t *region, unsigned int minimum, - isc_task_t *task, isc_socketevent_t *event, - unsigned int flags) { - event->ev_sender = sock; - event->result = ISC_R_UNSET; - event->region = *region; - event->n = 0; - event->offset = 0; - event->attributes = 0; - - /* - * UDP sockets are always partial read. - */ - if (sock->type == isc_sockettype_udp) { - event->minimum = 1; - } else { - if (minimum == 0) { - event->minimum = region->length; - } else { - event->minimum = minimum; - } - } - - return (socket_recv(sock, event, task, flags)); -} - -static isc_result_t -socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, - const isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, - unsigned int flags) { - int io_state; - bool have_lock = false; - isc_task_t *ntask = NULL; - isc_result_t result = ISC_R_SUCCESS; - - dev->ev_sender = task; - - set_dev_address(address, sock, dev); - if (pktinfo != NULL) { - dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; - dev->pktinfo = *pktinfo; - - if (!isc_sockaddr_issitelocal(&dev->address) && - !isc_sockaddr_islinklocal(&dev->address)) - { - socket_log(sock, NULL, TRACE, - "pktinfo structure provided, ifindex %u " - "(set to 0)", - pktinfo->ipi6_ifindex); - - /* - * Set the pktinfo index to 0 here, to let the - * kernel decide what interface it should send on. - */ - dev->pktinfo.ipi6_ifindex = 0; - } - } - - if (sock->type == isc_sockettype_udp) { - io_state = doio_send(sock, dev); - } else { - LOCK(&sock->lock); - have_lock = true; - - if (ISC_LIST_EMPTY(sock->send_list)) { - io_state = doio_send(sock, dev); - } else { - io_state = DOIO_SOFT; - } - } - - switch (io_state) { - case DOIO_SOFT: - /* - * We couldn't send all or part of the request right now, so - * queue it unless ISC_SOCKFLAG_NORETRY is set. - */ - if ((flags & ISC_SOCKFLAG_NORETRY) == 0) { - isc_task_attach(task, &ntask); - dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; - - if (!have_lock) { - LOCK(&sock->lock); - have_lock = true; - } - - /* - * Enqueue the request. If the socket was previously - * not being watched, poke the watcher to start - * paying attention to it. - */ - bool do_poke = ISC_LIST_EMPTY(sock->send_list); - ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link); - if (do_poke) { - select_poke(sock->manager, sock->threadid, - sock->fd, SELECT_POKE_WRITE); - } - socket_log(sock, NULL, EVENT, - "socket_send: event %p -> task %p", dev, - ntask); - - if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) { - result = ISC_R_INPROGRESS; - } - break; - } - - /* FALLTHROUGH */ - - case DOIO_HARD: - case DOIO_SUCCESS: - if (!have_lock) { - LOCK(&sock->lock); - have_lock = true; - } - if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0) { - send_senddone_event(sock, &dev); - } - break; - } - - if (have_lock) { - UNLOCK(&sock->lock); - } - - return (result); -} - -isc_result_t -isc_socket_send(isc_socket_t *sock, isc_region_t *region, isc_task_t *task, - isc_taskaction_t action, void *arg) { - /* - * REQUIRE() checking is performed in isc_socket_sendto(). - */ - return (isc_socket_sendto(sock, region, task, action, arg, NULL, NULL)); -} - -isc_result_t -isc_socket_sendto(isc_socket_t *sock, isc_region_t *region, isc_task_t *task, - isc_taskaction_t action, void *arg, - const isc_sockaddr_t *address, struct in6_pktinfo *pktinfo) { - isc_socketevent_t *dev; - isc_socketmgr_t *manager; - - REQUIRE(VALID_SOCKET(sock)); - REQUIRE(region != NULL); - REQUIRE(task != NULL); - REQUIRE(action != NULL); - - manager = sock->manager; - REQUIRE(VALID_MANAGER(manager)); - - INSIST(sock->bound); - - dev = allocate_socketevent(manager->mctx, sock, ISC_SOCKEVENT_SENDDONE, - action, arg); - if (dev == NULL) { - return (ISC_R_NOMEMORY); - } - - dev->region = *region; - - return (socket_send(sock, dev, task, address, pktinfo, 0)); -} - -isc_result_t -isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region, isc_task_t *task, - const isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, - isc_socketevent_t *event, unsigned int flags) { - REQUIRE(VALID_SOCKET(sock)); - REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE | ISC_SOCKFLAG_NORETRY)) == - 0); - if ((flags & ISC_SOCKFLAG_NORETRY) != 0) { - REQUIRE(sock->type == isc_sockettype_udp); - } - event->ev_sender = sock; - event->result = ISC_R_UNSET; - event->region = *region; - event->n = 0; - event->offset = 0; - event->attributes &= ~ISC_SOCKEVENTATTR_ATTACHED; - - return (socket_send(sock, event, task, address, pktinfo, flags)); -} - -void -isc_socket_cleanunix(const isc_sockaddr_t *sockaddr, bool active) { - int s; - struct stat sb; - char strbuf[ISC_STRERRORSIZE]; - - if (sockaddr->type.sa.sa_family != AF_UNIX) { - return; - } - -#ifndef S_ISSOCK -#if defined(S_IFMT) && defined(S_IFSOCK) -#define S_ISSOCK(mode) ((mode & S_IFMT) == S_IFSOCK) -#elif defined(_S_IFMT) && defined(S_IFSOCK) -#define S_ISSOCK(mode) ((mode & _S_IFMT) == S_IFSOCK) -#endif /* if defined(S_IFMT) && defined(S_IFSOCK) */ -#endif /* ifndef S_ISSOCK */ - -#ifndef S_ISFIFO -#if defined(S_IFMT) && defined(S_IFIFO) -#define S_ISFIFO(mode) ((mode & S_IFMT) == S_IFIFO) -#elif defined(_S_IFMT) && defined(S_IFIFO) -#define S_ISFIFO(mode) ((mode & _S_IFMT) == S_IFIFO) -#endif /* if defined(S_IFMT) && defined(S_IFIFO) */ -#endif /* ifndef S_ISFIFO */ - -#if !defined(S_ISFIFO) && !defined(S_ISSOCK) -/* cppcheck-suppress preprocessorErrorDirective */ -#error \ - You need to define S_ISFIFO and S_ISSOCK as appropriate for your platform. See . -#endif /* if !defined(S_ISFIFO) && !defined(S_ISSOCK) */ - -#ifndef S_ISFIFO -#define S_ISFIFO(mode) 0 -#endif /* ifndef S_ISFIFO */ - -#ifndef S_ISSOCK -#define S_ISSOCK(mode) 0 -#endif /* ifndef S_ISSOCK */ - - if (stat(sockaddr->type.sunix.sun_path, &sb) < 0) { - switch (errno) { - case ENOENT: - if (active) { /* We exited cleanly last time */ - break; - } - /* FALLTHROUGH */ - default: - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, - active ? ISC_LOG_ERROR : ISC_LOG_WARNING, - "isc_socket_cleanunix: stat(%s): %s", - sockaddr->type.sunix.sun_path, strbuf); - return; - } - } else { - if (!(S_ISSOCK(sb.st_mode) || S_ISFIFO(sb.st_mode))) { - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, - active ? ISC_LOG_ERROR : ISC_LOG_WARNING, - "isc_socket_cleanunix: %s: not a socket", - sockaddr->type.sunix.sun_path); - return; - } - } - - if (active) { - if (unlink(sockaddr->type.sunix.sun_path) < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "isc_socket_cleanunix: unlink(%s): %s", - sockaddr->type.sunix.sun_path, strbuf); - } - return; - } - - s = socket(AF_UNIX, SOCK_STREAM, 0); - if (s < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_WARNING, - "isc_socket_cleanunix: socket(%s): %s", - sockaddr->type.sunix.sun_path, strbuf); - return; - } - - if (connect(s, (const struct sockaddr *)&sockaddr->type.sunix, - sizeof(sockaddr->type.sunix)) < 0) - { - switch (errno) { - case ECONNREFUSED: - case ECONNRESET: - if (unlink(sockaddr->type.sunix.sun_path) < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write( - isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_WARNING, - "isc_socket_cleanunix: " - "unlink(%s): %s", - sockaddr->type.sunix.sun_path, strbuf); - } - break; - default: - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_WARNING, - "isc_socket_cleanunix: connect(%s): %s", - sockaddr->type.sunix.sun_path, strbuf); - break; - } - } - close(s); -} - -isc_result_t -isc_socket_permunix(const isc_sockaddr_t *sockaddr, uint32_t perm, - uint32_t owner, uint32_t group) { - isc_result_t result = ISC_R_SUCCESS; - char strbuf[ISC_STRERRORSIZE]; - char path[sizeof(sockaddr->type.sunix.sun_path)]; -#ifdef NEED_SECURE_DIRECTORY - char *slash; -#endif /* ifdef NEED_SECURE_DIRECTORY */ - - REQUIRE(sockaddr->type.sa.sa_family == AF_UNIX); - INSIST(strlen(sockaddr->type.sunix.sun_path) < sizeof(path)); - strlcpy(path, sockaddr->type.sunix.sun_path, sizeof(path)); - -#ifdef NEED_SECURE_DIRECTORY - slash = strrchr(path, '/'); - if (slash != NULL) { - if (slash != path) { - *slash = '\0'; - } else { - strlcpy(path, "/", sizeof(path)); - } - } else { - strlcpy(path, ".", sizeof(path)); - } -#endif /* ifdef NEED_SECURE_DIRECTORY */ - - if (chmod(path, perm) < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "isc_socket_permunix: chmod(%s, %d): %s", path, - perm, strbuf); - result = ISC_R_FAILURE; - } - if (chown(path, owner, group) < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, - "isc_socket_permunix: chown(%s, %d, %d): %s", - path, owner, group, strbuf); - result = ISC_R_FAILURE; - } - return (result); -} - -isc_result_t -isc_socket_bind(isc_socket_t *sock, const isc_sockaddr_t *sockaddr, - isc_socket_options_t options) { - char strbuf[ISC_STRERRORSIZE]; - int on = 1; - - REQUIRE(VALID_SOCKET(sock)); - - LOCK(&sock->lock); - - INSIST(!sock->bound); - - if (sock->pf != sockaddr->type.sa.sa_family) { - UNLOCK(&sock->lock); - return (ISC_R_FAMILYMISMATCH); - } - - /* - * Only set SO_REUSEADDR when we want a specific port. - */ -#ifdef AF_UNIX - if (sock->pf == AF_UNIX) { - goto bind_socket; - } -#endif /* ifdef AF_UNIX */ - if ((options & ISC_SOCKET_REUSEADDRESS) != 0 && - isc_sockaddr_getport(sockaddr) != (in_port_t)0) - { - if (setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on, - sizeof(on)) < 0) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d) failed", sock->fd); - } -#if defined(__FreeBSD_kernel__) && defined(SO_REUSEPORT_LB) - if (setsockopt(sock->fd, SOL_SOCKET, SO_REUSEPORT_LB, - (void *)&on, sizeof(on)) < 0) - { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d) failed", sock->fd); - } -#elif defined(__linux__) && defined(SO_REUSEPORT) - if (setsockopt(sock->fd, SOL_SOCKET, SO_REUSEPORT, (void *)&on, - sizeof(on)) < 0) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d) failed", sock->fd); - } -#endif /* if defined(__FreeBSD_kernel__) && defined(SO_REUSEPORT_LB) */ - /* Press on... */ - } -#ifdef AF_UNIX -bind_socket: -#endif /* ifdef AF_UNIX */ - if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) { - inc_stats(sock->manager->stats, - sock->statsindex[STATID_BINDFAIL]); - - UNLOCK(&sock->lock); - switch (errno) { - case EACCES: - return (ISC_R_NOPERM); - case EADDRNOTAVAIL: - return (ISC_R_ADDRNOTAVAIL); - case EADDRINUSE: - return (ISC_R_ADDRINUSE); - case EINVAL: - return (ISC_R_BOUND); - default: - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s", - strbuf); - return (ISC_R_UNEXPECTED); - } - } - - socket_log(sock, sockaddr, TRACE, "bound"); - sock->bound = 1; - - UNLOCK(&sock->lock); - return (ISC_R_SUCCESS); -} - -/* - * Enable this only for specific OS versions, and only when they have repaired - * their problems with it. Until then, this is is broken and needs to be - * disabled by default. See RT22589 for details. - */ -#undef ENABLE_ACCEPTFILTER - -isc_result_t -isc_socket_filter(isc_socket_t *sock, const char *filter) { -#if defined(SO_ACCEPTFILTER) && defined(ENABLE_ACCEPTFILTER) - char strbuf[ISC_STRERRORSIZE]; - struct accept_filter_arg afa; -#else /* if defined(SO_ACCEPTFILTER) && defined(ENABLE_ACCEPTFILTER) */ - UNUSED(sock); - UNUSED(filter); -#endif /* if defined(SO_ACCEPTFILTER) && defined(ENABLE_ACCEPTFILTER) */ - - REQUIRE(VALID_SOCKET(sock)); - -#if defined(SO_ACCEPTFILTER) && defined(ENABLE_ACCEPTFILTER) - bzero(&afa, sizeof(afa)); - strlcpy(afa.af_name, filter, sizeof(afa.af_name)); - if (setsockopt(sock->fd, SOL_SOCKET, SO_ACCEPTFILTER, &afa, - sizeof(afa)) == -1) { - strerror_r(errno, strbuf, sizeof(strbuf)); - socket_log(sock, NULL, CREATION, - "setsockopt(SO_ACCEPTFILTER): %s", strbuf); - return (ISC_R_FAILURE); - } - return (ISC_R_SUCCESS); -#else /* if defined(SO_ACCEPTFILTER) && defined(ENABLE_ACCEPTFILTER) */ - return (ISC_R_NOTIMPLEMENTED); -#endif /* if defined(SO_ACCEPTFILTER) && defined(ENABLE_ACCEPTFILTER) */ -} - -/* - * Try enabling TCP Fast Open for a given socket if the OS supports it. - */ -static void -set_tcp_fastopen(isc_socket_t *sock, unsigned int backlog) { -#if defined(ENABLE_TCP_FASTOPEN) && defined(TCP_FASTOPEN) - char strbuf[ISC_STRERRORSIZE]; - -/* - * FreeBSD, as of versions 10.3 and 11.0, defines TCP_FASTOPEN while also - * shipping a default kernel without TFO support, so we special-case it by - * performing an additional runtime check for TFO support using sysctl to - * prevent setsockopt() errors from being logged. - */ -#if defined(__FreeBSD__) && defined(HAVE_SYSCTLBYNAME) -#define SYSCTL_TFO "net.inet.tcp.fastopen.enabled" - unsigned int enabled; - size_t enabledlen = sizeof(enabled); - static bool tfo_notice_logged = false; - - if (sysctlbyname(SYSCTL_TFO, &enabled, &enabledlen, NULL, 0) < 0) { - /* - * This kernel does not support TCP Fast Open. There is - * nothing more we can do. - */ - return; - } else if (enabled == 0) { - /* - * This kernel does support TCP Fast Open, but it is disabled - * by sysctl. Notify the user, but do not nag. - */ - if (!tfo_notice_logged) { - isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, - ISC_LOGMODULE_SOCKET, ISC_LOG_NOTICE, - "TCP_FASTOPEN support is disabled by " - "sysctl (" SYSCTL_TFO " = 0)"); - tfo_notice_logged = true; - } - return; - } -#endif /* if defined(__FreeBSD__) && defined(HAVE_SYSCTLBYNAME) */ - -#ifdef __APPLE__ - backlog = 1; -#else /* ifdef __APPLE__ */ - backlog = backlog / 2; - if (backlog == 0) { - backlog = 1; - } -#endif /* ifdef __APPLE__ */ - if (setsockopt(sock->fd, IPPROTO_TCP, TCP_FASTOPEN, (void *)&backlog, - sizeof(backlog)) < 0) - { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, TCP_FASTOPEN) failed with %s", - sock->fd, strbuf); - /* TCP_FASTOPEN is experimental so ignore failures */ - } -#else /* if defined(ENABLE_TCP_FASTOPEN) && defined(TCP_FASTOPEN) */ - UNUSED(sock); - UNUSED(backlog); -#endif /* if defined(ENABLE_TCP_FASTOPEN) && defined(TCP_FASTOPEN) */ -} - -/* - * Set up to listen on a given socket. We do this by creating an internal - * event that will be dispatched when the socket has read activity. The - * watcher will send the internal event to the task when there is a new - * connection. - * - * Unlike in read, we don't preallocate a done event here. Every time there - * is a new connection we'll have to allocate a new one anyway, so we might - * as well keep things simple rather than having to track them. - */ -isc_result_t -isc_socket_listen(isc_socket_t *sock, unsigned int backlog) { - char strbuf[ISC_STRERRORSIZE]; - - REQUIRE(VALID_SOCKET(sock)); - - LOCK(&sock->lock); - - REQUIRE(!sock->listener); - REQUIRE(sock->bound); - REQUIRE(sock->type == isc_sockettype_tcp || - sock->type == isc_sockettype_unix); - - if (backlog == 0) { - backlog = SOMAXCONN; - } - - if (listen(sock->fd, (int)backlog) < 0) { - UNLOCK(&sock->lock); - strerror_r(errno, strbuf, sizeof(strbuf)); - - UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf); - - return (ISC_R_UNEXPECTED); - } - - set_tcp_fastopen(sock, backlog); - - sock->listener = 1; - - UNLOCK(&sock->lock); - return (ISC_R_SUCCESS); -} - -/* - * This should try to do aggressive accept() XXXMLG - */ -isc_result_t -isc_socket_accept(isc_socket_t *sock, isc_task_t *task, isc_taskaction_t action, - void *arg) { - isc_socket_newconnev_t *dev; - isc_socketmgr_t *manager; - isc_task_t *ntask = NULL; - isc_socket_t *nsock; - isc_result_t result; - bool do_poke = false; - - REQUIRE(VALID_SOCKET(sock)); - manager = sock->manager; - REQUIRE(VALID_MANAGER(manager)); - - LOCK(&sock->lock); - - REQUIRE(sock->listener); - - /* - * Sender field is overloaded here with the task we will be sending - * this event to. Just before the actual event is delivered the - * actual ev_sender will be touched up to be the socket. - */ - dev = (isc_socket_newconnev_t *)isc_event_allocate( - manager->mctx, task, ISC_SOCKEVENT_NEWCONN, action, arg, - sizeof(*dev)); - ISC_LINK_INIT(dev, ev_link); - - result = allocate_socket(manager, sock->type, &nsock); - if (result != ISC_R_SUCCESS) { - isc_event_free(ISC_EVENT_PTR(&dev)); - UNLOCK(&sock->lock); - return (result); - } - - /* - * Attach to socket and to task. - */ - isc_task_attach(task, &ntask); - if (isc_task_exiting(ntask)) { - free_socket(&nsock); - isc_task_detach(&ntask); - isc_event_free(ISC_EVENT_PTR(&dev)); - UNLOCK(&sock->lock); - return (ISC_R_SHUTTINGDOWN); - } - isc_refcount_increment0(&nsock->references); - nsock->statsindex = sock->statsindex; - - dev->ev_sender = ntask; - dev->newsocket = nsock; - - /* - * Poke watcher here. We still have the socket locked, so there - * is no race condition. We will keep the lock for such a short - * bit of time waking it up now or later won't matter all that much. - */ - do_poke = ISC_LIST_EMPTY(sock->accept_list); - ISC_LIST_ENQUEUE(sock->accept_list, dev, ev_link); - if (do_poke) { - select_poke(manager, sock->threadid, sock->fd, - SELECT_POKE_ACCEPT); - } - UNLOCK(&sock->lock); - return (ISC_R_SUCCESS); -} - -isc_result_t -isc_socket_connect(isc_socket_t *sock, const isc_sockaddr_t *addr, - isc_task_t *task, isc_taskaction_t action, void *arg) { - isc_socket_connev_t *dev; - isc_task_t *ntask = NULL; - isc_socketmgr_t *manager; - int cc; - char strbuf[ISC_STRERRORSIZE]; - char addrbuf[ISC_SOCKADDR_FORMATSIZE]; - - REQUIRE(VALID_SOCKET(sock)); - REQUIRE(addr != NULL); - REQUIRE(task != NULL); - REQUIRE(action != NULL); - - manager = sock->manager; - REQUIRE(VALID_MANAGER(manager)); - REQUIRE(addr != NULL); - - if (isc_sockaddr_ismulticast(addr)) { - return (ISC_R_MULTICAST); - } - - LOCK(&sock->lock); - - dev = (isc_socket_connev_t *)isc_event_allocate( - manager->mctx, sock, ISC_SOCKEVENT_CONNECT, action, arg, - sizeof(*dev)); - ISC_LINK_INIT(dev, ev_link); - - if (sock->connecting) { - INSIST(isc_sockaddr_equal(&sock->peer_address, addr)); - goto queue; - } - - if (sock->connected) { - INSIST(isc_sockaddr_equal(&sock->peer_address, addr)); - dev->result = ISC_R_SUCCESS; - isc_task_sendto(task, ISC_EVENT_PTR(&dev), sock->threadid); - - UNLOCK(&sock->lock); - - return (ISC_R_SUCCESS); - } - - /* - * Try to do the connect right away, as there can be only one - * outstanding, and it might happen to complete. - */ - sock->peer_address = *addr; - cc = connect(sock->fd, &addr->type.sa, addr->length); - if (cc < 0) { - /* - * The socket is nonblocking and the connection cannot be - * completed immediately. It is possible to select(2) or - * poll(2) for completion by selecting the socket for writing. - * After select(2) indicates writability, use getsockopt(2) to - * read the SO_ERROR option at level SOL_SOCKET to determine - * whether connect() completed successfully (SO_ERROR is zero) - * or unsuccessfully (SO_ERROR is one of the usual error codes - * listed here, explaining the reason for the failure). - */ - if (sock->type == isc_sockettype_udp && errno == EINPROGRESS) { - cc = 0; - goto success; - } - if (SOFT_ERROR(errno) || errno == EINPROGRESS) { - goto queue; - } - - switch (errno) { -#define ERROR_MATCH(a, b) \ - case a: \ - dev->result = b; \ - goto err_exit; - ERROR_MATCH(EACCES, ISC_R_NOPERM); - ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); - ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); - ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED); - ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH); -#ifdef EHOSTDOWN - ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH); -#endif /* ifdef EHOSTDOWN */ - ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH); - ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES); - ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH); - ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED); - ERROR_MATCH(ETIMEDOUT, ISC_R_TIMEDOUT); - ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET); -#undef ERROR_MATCH - } - - sock->connected = 0; - - strerror_r(errno, strbuf, sizeof(strbuf)); - isc_sockaddr_format(addr, addrbuf, sizeof(addrbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "connect(%s) %d/%s", - addrbuf, errno, strbuf); - - UNLOCK(&sock->lock); - inc_stats(sock->manager->stats, - sock->statsindex[STATID_CONNECTFAIL]); - isc_event_free(ISC_EVENT_PTR(&dev)); - return (ISC_R_UNEXPECTED); - - err_exit: - sock->connected = 0; - isc_task_sendto(task, ISC_EVENT_PTR(&dev), sock->threadid); - - UNLOCK(&sock->lock); - inc_stats(sock->manager->stats, - sock->statsindex[STATID_CONNECTFAIL]); - return (ISC_R_SUCCESS); - } - - /* - * If connect completed, fire off the done event. - */ -success: - if (cc == 0) { - sock->connected = 1; - sock->bound = 1; - dev->result = ISC_R_SUCCESS; - isc_task_sendto(task, ISC_EVENT_PTR(&dev), sock->threadid); - - UNLOCK(&sock->lock); - - inc_stats(sock->manager->stats, - sock->statsindex[STATID_CONNECT]); - - return (ISC_R_SUCCESS); - } - -queue: - - /* - * Attach to task. - */ - isc_task_attach(task, &ntask); - - dev->ev_sender = ntask; - - /* - * Poke watcher here. We still have the socket locked, so there - * is no race condition. We will keep the lock for such a short - * bit of time waking it up now or later won't matter all that much. - */ - bool do_poke = ISC_LIST_EMPTY(sock->connect_list); - ISC_LIST_ENQUEUE(sock->connect_list, dev, ev_link); - if (do_poke && !sock->connecting) { - sock->connecting = 1; - select_poke(manager, sock->threadid, sock->fd, - SELECT_POKE_CONNECT); - } - - UNLOCK(&sock->lock); - return (ISC_R_SUCCESS); -} - -/* - * Called when a socket with a pending connect() finishes. - */ -static void -internal_connect(isc_socket_t *sock) { - isc_socket_connev_t *dev; - int cc; - isc_result_t result; - socklen_t optlen; - char strbuf[ISC_STRERRORSIZE]; - char peerbuf[ISC_SOCKADDR_FORMATSIZE]; - - INSIST(VALID_SOCKET(sock)); - REQUIRE(sock->fd >= 0); - - /* - * Get the first item off the connect list. - * If it is empty, unlock the socket and return. - */ - dev = ISC_LIST_HEAD(sock->connect_list); - if (dev == NULL) { - INSIST(!sock->connecting); - goto finish; - } - - INSIST(sock->connecting); - sock->connecting = 0; - - /* - * Get any possible error status here. - */ - optlen = sizeof(cc); - if (getsockopt(sock->fd, SOL_SOCKET, SO_ERROR, (void *)&cc, - (void *)&optlen) != 0) - { - cc = errno; - } else { - errno = cc; - } - - if (errno != 0) { - /* - * If the error is EAGAIN, just re-select on this - * fd and pretend nothing strange happened. - */ - if (SOFT_ERROR(errno) || errno == EINPROGRESS) { - sock->connecting = 1; - return; - } - - inc_stats(sock->manager->stats, - sock->statsindex[STATID_CONNECTFAIL]); - - /* - * Translate other errors into ISC_R_* flavors. - */ - switch (errno) { -#define ERROR_MATCH(a, b) \ - case a: \ - result = b; \ - break; - ERROR_MATCH(EACCES, ISC_R_NOPERM); - ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); - ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); - ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED); - ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH); -#ifdef EHOSTDOWN - ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH); -#endif /* ifdef EHOSTDOWN */ - ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH); - ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES); - ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH); - ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED); - ERROR_MATCH(ETIMEDOUT, ISC_R_TIMEDOUT); - ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET); -#undef ERROR_MATCH - default: - result = ISC_R_UNEXPECTED; - isc_sockaddr_format(&sock->peer_address, peerbuf, - sizeof(peerbuf)); - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "internal_connect: connect(%s) %s", - peerbuf, strbuf); - } - } else { - inc_stats(sock->manager->stats, - sock->statsindex[STATID_CONNECT]); - result = ISC_R_SUCCESS; - sock->connected = 1; - sock->bound = 1; - } - - do { - dev->result = result; - send_connectdone_event(sock, &dev); - dev = ISC_LIST_HEAD(sock->connect_list); - } while (dev != NULL); - -finish: - unwatch_fd(&sock->manager->threads[sock->threadid], sock->fd, - SELECT_POKE_CONNECT); -} - -isc_result_t -isc_socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) { - isc_result_t result; - - REQUIRE(VALID_SOCKET(sock)); - REQUIRE(addressp != NULL); - - LOCK(&sock->lock); - - if (sock->connected) { - *addressp = sock->peer_address; - result = ISC_R_SUCCESS; - } else { - result = ISC_R_NOTCONNECTED; - } - - UNLOCK(&sock->lock); - - return (result); -} - -isc_result_t -isc_socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) { - socklen_t len; - isc_result_t result; - char strbuf[ISC_STRERRORSIZE]; - - REQUIRE(VALID_SOCKET(sock)); - REQUIRE(addressp != NULL); - - LOCK(&sock->lock); - - if (!sock->bound) { - result = ISC_R_NOTBOUND; - goto out; - } - - result = ISC_R_SUCCESS; - - len = sizeof(addressp->type); - if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) { - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s", strbuf); - result = ISC_R_UNEXPECTED; - goto out; - } - addressp->length = (unsigned int)len; - -out: - UNLOCK(&sock->lock); - - return (result); -} - -/* - * Run through the list of events on this socket, and cancel the ones - * queued for task "task" of type "how". "how" is a bitmask. - */ -void -isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) { - REQUIRE(VALID_SOCKET(sock)); - - /* - * Quick exit if there is nothing to do. Don't even bother locking - * in this case. - */ - if (how == 0) { - return; - } - - LOCK(&sock->lock); - - /* - * All of these do the same thing, more or less. - * Each will: - * o If the internal event is marked as "posted" try to - * remove it from the task's queue. If this fails, mark it - * as canceled instead, and let the task clean it up later. - * o For each I/O request for that task of that type, post - * its done event with status of "ISC_R_CANCELED". - * o Reset any state needed. - */ - if (((how & ISC_SOCKCANCEL_RECV) != 0) && - !ISC_LIST_EMPTY(sock->recv_list)) { - isc_socketevent_t *dev; - isc_socketevent_t *next; - isc_task_t *current_task; - - dev = ISC_LIST_HEAD(sock->recv_list); - - while (dev != NULL) { - current_task = dev->ev_sender; - next = ISC_LIST_NEXT(dev, ev_link); - - if ((task == NULL) || (task == current_task)) { - dev->result = ISC_R_CANCELED; - send_recvdone_event(sock, &dev); - } - dev = next; - } - } - - if (((how & ISC_SOCKCANCEL_SEND) != 0) && - !ISC_LIST_EMPTY(sock->send_list)) { - isc_socketevent_t *dev; - isc_socketevent_t *next; - isc_task_t *current_task; - - dev = ISC_LIST_HEAD(sock->send_list); - - while (dev != NULL) { - current_task = dev->ev_sender; - next = ISC_LIST_NEXT(dev, ev_link); - - if ((task == NULL) || (task == current_task)) { - dev->result = ISC_R_CANCELED; - send_senddone_event(sock, &dev); - } - dev = next; - } - } - - if (((how & ISC_SOCKCANCEL_ACCEPT) != 0) && - !ISC_LIST_EMPTY(sock->accept_list)) { - isc_socket_newconnev_t *dev; - isc_socket_newconnev_t *next; - isc_task_t *current_task; - - dev = ISC_LIST_HEAD(sock->accept_list); - while (dev != NULL) { - current_task = dev->ev_sender; - next = ISC_LIST_NEXT(dev, ev_link); - - if ((task == NULL) || (task == current_task)) { - ISC_LIST_UNLINK(sock->accept_list, dev, - ev_link); - - isc_refcount_decrementz( - &NEWCONNSOCK(dev)->references); - free_socket((isc_socket_t **)&dev->newsocket); - - dev->result = ISC_R_CANCELED; - dev->ev_sender = sock; - isc_task_sendtoanddetach(¤t_task, - ISC_EVENT_PTR(&dev), - sock->threadid); - } - - dev = next; - } - } - - if (((how & ISC_SOCKCANCEL_CONNECT) != 0) && - !ISC_LIST_EMPTY(sock->connect_list)) - { - isc_socket_connev_t *dev; - isc_socket_connev_t *next; - isc_task_t *current_task; - - INSIST(sock->connecting); - sock->connecting = 0; - - dev = ISC_LIST_HEAD(sock->connect_list); - - while (dev != NULL) { - current_task = dev->ev_sender; - next = ISC_LIST_NEXT(dev, ev_link); - - if ((task == NULL) || (task == current_task)) { - dev->result = ISC_R_CANCELED; - send_connectdone_event(sock, &dev); - } - dev = next; - } - } - - UNLOCK(&sock->lock); -} - -isc_sockettype_t -isc_socket_gettype(isc_socket_t *sock) { - REQUIRE(VALID_SOCKET(sock)); - - return (sock->type); -} - -void -isc_socket_ipv6only(isc_socket_t *sock, bool yes) { -#if defined(IPV6_V6ONLY) - int onoff = yes ? 1 : 0; -#else /* if defined(IPV6_V6ONLY) */ - UNUSED(yes); - UNUSED(sock); -#endif /* if defined(IPV6_V6ONLY) */ - - REQUIRE(VALID_SOCKET(sock)); - -#ifdef IPV6_V6ONLY - if (sock->pf == AF_INET6) { - if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY, - (void *)&onoff, sizeof(int)) < 0) - { - char strbuf[ISC_STRERRORSIZE]; - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IPV6_V6ONLY) failed: " - "%s", - sock->fd, strbuf); - } - } -#endif /* ifdef IPV6_V6ONLY */ -} - -static void -setdscp(isc_socket_t *sock, isc_dscp_t dscp) { -#if defined(IP_TOS) || defined(IPV6_TCLASS) - int value = dscp << 2; -#endif /* if defined(IP_TOS) || defined(IPV6_TCLASS) */ - - sock->dscp = dscp; - -#ifdef IP_TOS - if (sock->pf == AF_INET) { - if (setsockopt(sock->fd, IPPROTO_IP, IP_TOS, (void *)&value, - sizeof(value)) < 0) { - char strbuf[ISC_STRERRORSIZE]; - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IP_TOS, %.02x) " - "failed: %s", - sock->fd, value >> 2, strbuf); - } - } -#endif /* ifdef IP_TOS */ -#ifdef IPV6_TCLASS - if (sock->pf == AF_INET6) { - if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_TCLASS, - (void *)&value, sizeof(value)) < 0) - { - char strbuf[ISC_STRERRORSIZE]; - strerror_r(errno, strbuf, sizeof(strbuf)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "setsockopt(%d, IPV6_TCLASS, %.02x) " - "failed: %s", - sock->fd, dscp >> 2, strbuf); - } - } -#endif /* ifdef IPV6_TCLASS */ -} - -void -isc_socket_dscp(isc_socket_t *sock, isc_dscp_t dscp) { - REQUIRE(VALID_SOCKET(sock)); - REQUIRE(dscp < 0x40); - -#if !defined(IP_TOS) && !defined(IPV6_TCLASS) - UNUSED(dscp); -#else /* if !defined(IP_TOS) && !defined(IPV6_TCLASS) */ - if (dscp < 0) { - return; - } - - /* The DSCP value must not be changed once it has been set. */ - if (isc_dscp_check_value != -1) { - INSIST(dscp == isc_dscp_check_value); - } -#endif /* if !defined(IP_TOS) && !defined(IPV6_TCLASS) */ - - setdscp(sock, dscp); -} - -isc_socketevent_t * -isc_socket_socketevent(isc_mem_t *mctx, void *sender, isc_eventtype_t eventtype, - isc_taskaction_t action, void *arg) { - return (allocate_socketevent(mctx, sender, eventtype, action, arg)); -} - -void -isc_socket_setname(isc_socket_t *sock, const char *name, void *tag) { - /* - * Name 'sock'. - */ - - REQUIRE(VALID_SOCKET(sock)); - - LOCK(&sock->lock); - strlcpy(sock->name, name, sizeof(sock->name)); - sock->tag = tag; - UNLOCK(&sock->lock); -} - -const char * -isc_socket_getname(isc_socket_t *sock) { - return (sock->name); -} - -void * -isc_socket_gettag(isc_socket_t *sock) { - return (sock->tag); -} - -int -isc_socket_getfd(isc_socket_t *sock) { - return ((short)sock->fd); -} - -static isc_once_t hasreuseport_once = ISC_ONCE_INIT; -static bool hasreuseport = false; - -static void -init_hasreuseport(void) { -/* - * SO_REUSEPORT works very differently on *BSD and on Linux (because why not). - * We only want to use it on Linux, if it's available. - */ -#if (defined(SO_REUSEPORT) && defined(__linux__)) || \ - (defined(SO_REUSEPORT_LB) && defined(__FreeBSD_kernel__)) - int sock, yes = 1; - sock = socket(AF_INET, SOCK_DGRAM, 0); - if (sock < 0) { - sock = socket(AF_INET6, SOCK_DGRAM, 0); - if (sock < 0) { - return; - } - } - if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (void *)&yes, - sizeof(yes)) < 0) { - close(sock); - return; -#if defined(__FreeBSD_kernel__) - } else if (setsockopt(sock, SOL_SOCKET, SO_REUSEPORT_LB, (void *)&yes, - sizeof(yes)) < 0) -#else /* if defined(__FreeBSD_kernel__) */ - } else if (setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, (void *)&yes, - sizeof(yes)) < 0) -#endif /* if defined(__FreeBSD_kernel__) */ - { - close(sock); - return; - } - hasreuseport = true; - close(sock); -#endif /* if (defined(SO_REUSEPORT) && defined(__linux__)) || \ - * (defined(SO_REUSEPORT_LB) && defined(__FreeBSD_kernel__)) */ -} - -bool -isc_socket_hasreuseport(void) { - RUNTIME_CHECK(isc_once_do(&hasreuseport_once, init_hasreuseport) == - ISC_R_SUCCESS); - return (hasreuseport); -} - -#if defined(HAVE_LIBXML2) || defined(HAVE_JSON_C) -static const char * -_socktype(isc_sockettype_t type) { - switch (type) { - case isc_sockettype_udp: - return ("udp"); - case isc_sockettype_tcp: - return ("tcp"); - case isc_sockettype_unix: - return ("unix"); - default: - return ("not-initialized"); - } -} -#endif /* if defined(HAVE_LIBXML2) || defined(HAVE_JSON_C) */ - -#ifdef HAVE_LIBXML2 -#define TRY0(a) \ - do { \ - xmlrc = (a); \ - if (xmlrc < 0) \ - goto error; \ - } while (0) -int -isc_socketmgr_renderxml(isc_socketmgr_t *mgr, void *writer0) { - isc_socket_t *sock = NULL; - char peerbuf[ISC_SOCKADDR_FORMATSIZE]; - isc_sockaddr_t addr; - socklen_t len; - int xmlrc; - xmlTextWriterPtr writer = (xmlTextWriterPtr)writer0; - - LOCK(&mgr->lock); - - TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "sockets")); - sock = ISC_LIST_HEAD(mgr->socklist); - while (sock != NULL) { - LOCK(&sock->lock); - TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "socket")); - - TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "id")); - TRY0(xmlTextWriterWriteFormatString(writer, "%p", sock)); - TRY0(xmlTextWriterEndElement(writer)); - - if (sock->name[0] != 0) { - TRY0(xmlTextWriterStartElement(writer, - ISC_XMLCHAR "name")); - TRY0(xmlTextWriterWriteFormatString(writer, "%s", - sock->name)); - TRY0(xmlTextWriterEndElement(writer)); /* name */ - } - - TRY0(xmlTextWriterStartElement(writer, - ISC_XMLCHAR "references")); - TRY0(xmlTextWriterWriteFormatString( - writer, "%d", - (int)isc_refcount_current(&sock->references))); - TRY0(xmlTextWriterEndElement(writer)); - - TRY0(xmlTextWriterWriteElement( - writer, ISC_XMLCHAR "type", - ISC_XMLCHAR _socktype(sock->type))); - - if (sock->connected) { - isc_sockaddr_format(&sock->peer_address, peerbuf, - sizeof(peerbuf)); - TRY0(xmlTextWriterWriteElement( - writer, ISC_XMLCHAR "peer-address", - ISC_XMLCHAR peerbuf)); - } - - len = sizeof(addr); - if (getsockname(sock->fd, &addr.type.sa, (void *)&len) == 0) { - isc_sockaddr_format(&addr, peerbuf, sizeof(peerbuf)); - TRY0(xmlTextWriterWriteElement( - writer, ISC_XMLCHAR "local-address", - ISC_XMLCHAR peerbuf)); - } - - TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "states")); - if (sock->listener) { - TRY0(xmlTextWriterWriteElement(writer, - ISC_XMLCHAR "state", - ISC_XMLCHAR "listener")); - } - if (sock->connected) { - TRY0(xmlTextWriterWriteElement( - writer, ISC_XMLCHAR "state", - ISC_XMLCHAR "connected")); - } - if (sock->connecting) { - TRY0(xmlTextWriterWriteElement( - writer, ISC_XMLCHAR "state", - ISC_XMLCHAR "connecting")); - } - if (sock->bound) { - TRY0(xmlTextWriterWriteElement(writer, - ISC_XMLCHAR "state", - ISC_XMLCHAR "bound")); - } - - TRY0(xmlTextWriterEndElement(writer)); /* states */ - - TRY0(xmlTextWriterEndElement(writer)); /* socket */ - - UNLOCK(&sock->lock); - sock = ISC_LIST_NEXT(sock, link); - } - TRY0(xmlTextWriterEndElement(writer)); /* sockets */ - -error: - if (sock != NULL) { - UNLOCK(&sock->lock); - } - - UNLOCK(&mgr->lock); - - return (xmlrc); -} -#endif /* HAVE_LIBXML2 */ - -#ifdef HAVE_JSON_C -#define CHECKMEM(m) \ - do { \ - if (m == NULL) { \ - result = ISC_R_NOMEMORY; \ - goto error; \ - } \ - } while (0) - -isc_result_t -isc_socketmgr_renderjson(isc_socketmgr_t *mgr, void *stats0) { - isc_result_t result = ISC_R_SUCCESS; - isc_socket_t *sock = NULL; - char peerbuf[ISC_SOCKADDR_FORMATSIZE]; - isc_sockaddr_t addr; - socklen_t len; - json_object *obj, *array = json_object_new_array(); - json_object *stats = (json_object *)stats0; - - CHECKMEM(array); - - LOCK(&mgr->lock); - - sock = ISC_LIST_HEAD(mgr->socklist); - while (sock != NULL) { - json_object *states, *entry = json_object_new_object(); - char buf[255]; - - CHECKMEM(entry); - json_object_array_add(array, entry); - - LOCK(&sock->lock); - - snprintf(buf, sizeof(buf), "%p", sock); - obj = json_object_new_string(buf); - CHECKMEM(obj); - json_object_object_add(entry, "id", obj); - - if (sock->name[0] != 0) { - obj = json_object_new_string(sock->name); - CHECKMEM(obj); - json_object_object_add(entry, "name", obj); - } - - obj = json_object_new_int( - (int)isc_refcount_current(&sock->references)); - CHECKMEM(obj); - json_object_object_add(entry, "references", obj); - - obj = json_object_new_string(_socktype(sock->type)); - CHECKMEM(obj); - json_object_object_add(entry, "type", obj); - - if (sock->connected) { - isc_sockaddr_format(&sock->peer_address, peerbuf, - sizeof(peerbuf)); - obj = json_object_new_string(peerbuf); - CHECKMEM(obj); - json_object_object_add(entry, "peer-address", obj); - } - - len = sizeof(addr); - if (getsockname(sock->fd, &addr.type.sa, (void *)&len) == 0) { - isc_sockaddr_format(&addr, peerbuf, sizeof(peerbuf)); - obj = json_object_new_string(peerbuf); - CHECKMEM(obj); - json_object_object_add(entry, "local-address", obj); - } - - states = json_object_new_array(); - CHECKMEM(states); - json_object_object_add(entry, "states", states); - - if (sock->listener) { - obj = json_object_new_string("listener"); - CHECKMEM(obj); - json_object_array_add(states, obj); - } - - if (sock->connected) { - obj = json_object_new_string("connected"); - CHECKMEM(obj); - json_object_array_add(states, obj); - } - - if (sock->connecting) { - obj = json_object_new_string("connecting"); - CHECKMEM(obj); - json_object_array_add(states, obj); - } - - if (sock->bound) { - obj = json_object_new_string("bound"); - CHECKMEM(obj); - json_object_array_add(states, obj); - } - - UNLOCK(&sock->lock); - sock = ISC_LIST_NEXT(sock, link); - } - - json_object_object_add(stats, "sockets", array); - array = NULL; - result = ISC_R_SUCCESS; - -error: - if (array != NULL) { - json_object_put(array); - } - - if (sock != NULL) { - UNLOCK(&sock->lock); - } - - UNLOCK(&mgr->lock); - - return (result); -} -#endif /* HAVE_JSON_C */ diff --git a/lib/isc/socket_p.h b/lib/isc/socket_p.h deleted file mode 100644 index c99d246d50..0000000000 --- a/lib/isc/socket_p.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (C) Internet Systems Consortium, Inc. ("ISC") - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, you can obtain one at https://mozilla.org/MPL/2.0/. - * - * See the COPYRIGHT file distributed with this work for additional - * information regarding copyright ownership. - */ - -#pragma once - -#include -#include -#include - -isc_result_t -isc__socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp, - unsigned int maxsocks, int nthreads); -/*%< - * Create a socket manager. If "maxsocks" is non-zero, it specifies the - * maximum number of sockets that the created manager should handle. - * - * Notes: - * - *\li All memory will be allocated in memory context 'mctx'. - * - * Requires: - * - *\li 'mctx' is a valid memory context. - * - *\li 'managerp' points to a NULL isc_socketmgr_t. - * - * Ensures: - * - *\li '*managerp' is a valid isc_socketmgr_t. - * - * Returns: - * - *\li #ISC_R_SUCCESS - *\li #ISC_R_NOMEMORY - *\li #ISC_R_UNEXPECTED - *\li #ISC_R_NOTIMPLEMENTED - */ - -void -isc__socketmgr_destroy(isc_socketmgr_t **managerp); -/*%< - * Destroy a socket manager. - * - * Notes: - * - *\li This routine blocks until there are no sockets left in the manager, - * so if the caller holds any socket references using the manager, it - * must detach them before calling isc_socketmgr_destroy() or it will - * block forever. - * - * Requires: - * - *\li '*managerp' is a valid isc_socketmgr_t. - * - *\li All sockets managed by this manager are fully detached. - * - * Ensures: - * - *\li *managerp == NULL - * - *\li All resources used by the manager have been freed. - */ - -#include - -typedef struct isc_socketwait isc_socketwait_t; -int -isc__socketmgr_waitevents(isc_socketmgr_t *, struct timeval *, - isc_socketwait_t **); -isc_result_t -isc__socketmgr_dispatch(isc_socketmgr_t *, isc_socketwait_t *); diff --git a/lib/isc/tests/Makefile.am b/lib/isc/tests/Makefile.am index de488951ab..d84e167e1b 100644 --- a/lib/isc/tests/Makefile.am +++ b/lib/isc/tests/Makefile.am @@ -9,7 +9,6 @@ LDADD += \ check_LTLIBRARIES = libisctest.la libisctest_la_SOURCES = \ - ../socket_p.h \ isctest.c \ isctest.h \ uv_wrap.h @@ -40,7 +39,6 @@ check_PROGRAMS = \ safe_test \ siphash_test \ sockaddr_test \ - socket_test \ stats_test \ symtab_test \ task_test \ diff --git a/lib/isc/tests/isctest.c b/lib/isc/tests/isctest.c index 6975501ef7..af8fa5183f 100644 --- a/lib/isc/tests/isctest.c +++ b/lib/isc/tests/isctest.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -33,7 +32,6 @@ isc_mem_t *test_mctx = NULL; isc_log_t *test_lctx = NULL; isc_taskmgr_t *taskmgr = NULL; isc_timermgr_t *timermgr = NULL; -isc_socketmgr_t *socketmgr = NULL; isc_nm_t *netmgr = NULL; isc_task_t *maintask = NULL; int ncpus; @@ -61,8 +59,7 @@ cleanup_managers(void) { } isc_managers_destroy(netmgr == NULL ? NULL : &netmgr, taskmgr == NULL ? NULL : &taskmgr, - timermgr == NULL ? NULL : &timermgr, - socketmgr == NULL ? NULL : &socketmgr); + timermgr == NULL ? NULL : &timermgr); } static isc_result_t @@ -80,8 +77,8 @@ create_managers(unsigned int workers) { INSIST(workers != 0); isc_hp_init(6 * workers); - isc_managers_create(test_mctx, workers, 0, 0, &netmgr, &taskmgr, - &timermgr, &socketmgr); + isc_managers_create(test_mctx, workers, 0, &netmgr, &taskmgr, + &timermgr); CHECK(isc_task_create_bound(taskmgr, 0, &maintask, 0)); isc_taskmgr_setexcltask(taskmgr, maintask); diff --git a/lib/isc/tests/isctest.h b/lib/isc/tests/isctest.h index 74196f5861..f773559b2d 100644 --- a/lib/isc/tests/isctest.h +++ b/lib/isc/tests/isctest.h @@ -39,7 +39,6 @@ extern isc_mem_t *test_mctx; extern isc_log_t *test_lctx; extern isc_taskmgr_t *taskmgr; extern isc_timermgr_t *timermgr; -extern isc_socketmgr_t *socketmgr; extern isc_nm_t *netmgr; extern int ncpus; diff --git a/lib/isc/tests/socket_test.c b/lib/isc/tests/socket_test.c deleted file mode 100644 index a98fc11906..0000000000 --- a/lib/isc/tests/socket_test.c +++ /dev/null @@ -1,734 +0,0 @@ -/* - * Copyright (C) Internet Systems Consortium, Inc. ("ISC") - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, you can obtain one at https://mozilla.org/MPL/2.0/. - * - * See the COPYRIGHT file distributed with this work for additional - * information regarding copyright ownership. - */ - -/*! \file */ - -#if HAVE_CMOCKA -#include -#include /* IWYU pragma: keep */ -#include -#include -#include -#include -#include -#include -#include - -#define UNIT_TESTING -#include - -#include -#include -#include -#include - -#include "../socket_p.h" -#include "isctest.h" - -static bool recv_dscp; -static unsigned int recv_dscp_value; -static bool recv_trunc; -isc_socket_t *s1 = NULL, *s2 = NULL, *s3 = NULL; -isc_task_t *test_task = NULL; - -/* - * Helper functions - */ - -static int -_setup(void **state) { - isc_result_t result; - - UNUSED(state); - - result = isc_test_begin(NULL, true, 0); - assert_int_equal(result, ISC_R_SUCCESS); - - return (0); -} - -static int -_teardown(void **state) { - UNUSED(state); - - if (s1 != NULL) { - isc_socket_detach(&s1); - } - if (s2 != NULL) { - isc_socket_detach(&s2); - } - if (s3 != NULL) { - isc_socket_detach(&s3); - } - if (test_task != NULL) { - isc_task_detach(&test_task); - } - - isc_test_end(); - - return (0); -} - -typedef struct { - atomic_bool done; - atomic_uintptr_t socket; - isc_result_t result; -} completion_t; - -static void -completion_init(completion_t *completion) { - atomic_init(&completion->done, false); - atomic_init(&completion->socket, (uintptr_t)NULL); -} - -static void -accept_done(isc_task_t *task, isc_event_t *event) { - isc_socket_newconnev_t *nevent = (isc_socket_newconnev_t *)event; - completion_t *completion = event->ev_arg; - - UNUSED(task); - - completion->result = nevent->result; - atomic_store(&completion->done, true); - if (completion->result == ISC_R_SUCCESS) { - atomic_store(&completion->socket, (uintptr_t)nevent->newsocket); - } - - isc_event_free(&event); -} - -static void -event_done(isc_task_t *task, isc_event_t *event) { - isc_socketevent_t *sev = NULL; - isc_socket_connev_t *connev = NULL; - completion_t *completion = event->ev_arg; - UNUSED(task); - - switch (event->ev_type) { - case ISC_SOCKEVENT_RECVDONE: - case ISC_SOCKEVENT_SENDDONE: - sev = (isc_socketevent_t *)event; - completion->result = sev->result; - if ((sev->attributes & ISC_SOCKEVENTATTR_DSCP) != 0) { - recv_dscp = true; - recv_dscp_value = sev->dscp; - } else { - recv_dscp = false; - } - recv_trunc = ((sev->attributes & ISC_SOCKEVENTATTR_TRUNC) != 0); - break; - case ISC_SOCKEVENT_CONNECT: - connev = (isc_socket_connev_t *)event; - completion->result = connev->result; - break; - default: - assert_false(true); - } - atomic_store(&completion->done, true); - isc_event_free(&event); -} - -static void -waitfor(completion_t *completion) { - int i = 0; - while (!atomic_load(&completion->done) && i++ < 5000) { - isc_test_nap(10000); - } - assert_true(atomic_load(&completion->done)); -} - -static void -waitfor2(completion_t *c1, completion_t *c2) { - int i = 0; - - while (!(atomic_load(&c1->done) && atomic_load(&c2->done)) && - i++ < 5000) { - isc_test_nap(10000); - } - assert_true(atomic_load(&c1->done) && atomic_load(&c2->done)); -} - -/* - * Individual unit tests - */ - -/* Test UDP sendto/recv (IPv4) */ -static void -udp_sendto_test(void **state) { - isc_result_t result; - isc_sockaddr_t addr1, addr2; - struct in_addr in; - char sendbuf[BUFSIZ], recvbuf[BUFSIZ]; - completion_t completion; - isc_region_t r; - - UNUSED(state); - - in.s_addr = inet_addr("127.0.0.1"); - isc_sockaddr_fromin(&addr1, &in, 0); - isc_sockaddr_fromin(&addr2, &in, 0); - - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s1); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s1, &addr1, 0); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s1, &addr1); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr1) != 0); - - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s2); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s2, &addr2, 0); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s2, &addr2); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr2) != 0); - - result = isc_task_create(taskmgr, 0, &test_task); - assert_int_equal(result, ISC_R_SUCCESS); - - snprintf(sendbuf, sizeof(sendbuf), "Hello"); - r.base = (void *)sendbuf; - r.length = strlen(sendbuf) + 1; - - completion_init(&completion); - result = isc_socket_sendto(s1, &r, test_task, event_done, &completion, - &addr2, NULL); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - - r.base = (void *)recvbuf; - r.length = BUFSIZ; - completion_init(&completion); - result = isc_socket_recv(s2, &r, 1, test_task, event_done, &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_string_equal(recvbuf, "Hello"); -} - -/* Test UDP sendto/recv (IPv4) */ -static void -udp_dscp_v4_test(void **state) { - isc_result_t result; - isc_sockaddr_t addr1, addr2; - struct in_addr in; - char sendbuf[BUFSIZ], recvbuf[BUFSIZ]; - completion_t completion; - isc_region_t r; - isc_socketevent_t *socketevent; - - UNUSED(state); - - in.s_addr = inet_addr("127.0.0.1"); - isc_sockaddr_fromin(&addr1, &in, 0); - isc_sockaddr_fromin(&addr2, &in, 0); - - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s1); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s1, &addr1, ISC_SOCKET_REUSEADDRESS); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s1, &addr1); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr1) != 0); - - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s2); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s2, &addr2, ISC_SOCKET_REUSEADDRESS); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s2, &addr2); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr2) != 0); - - result = isc_task_create(taskmgr, 0, &test_task); - assert_int_equal(result, ISC_R_SUCCESS); - - snprintf(sendbuf, sizeof(sendbuf), "Hello"); - r.base = (void *)sendbuf; - r.length = strlen(sendbuf) + 1; - - completion_init(&completion); - - socketevent = isc_socket_socketevent( - test_mctx, s1, ISC_SOCKEVENT_SENDDONE, event_done, &completion); - assert_non_null(socketevent); - - if ((isc_net_probedscp() & ISC_NET_DSCPPKTV4) != 0) { - socketevent->dscp = 056; /* EF */ - socketevent->attributes |= ISC_SOCKEVENTATTR_DSCP; - } else if ((isc_net_probedscp() & ISC_NET_DSCPSETV4) != 0) { - isc_socket_dscp(s1, 056); /* EF */ - socketevent->dscp = 0; - socketevent->attributes &= ~ISC_SOCKEVENTATTR_DSCP; - } - - recv_dscp = false; - recv_dscp_value = 0; - - result = isc_socket_sendto2(s1, &r, test_task, &addr2, NULL, - socketevent, 0); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - - r.base = (void *)recvbuf; - r.length = BUFSIZ; - completion_init(&completion); - result = isc_socket_recv(s2, &r, 1, test_task, event_done, &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_string_equal(recvbuf, "Hello"); - - if ((isc_net_probedscp() & ISC_NET_DSCPRECVV4) != 0) { - assert_true(recv_dscp); - assert_int_equal(recv_dscp_value, 056); - } else { - assert_false(recv_dscp); - } -} - -/* Test UDP sendto/recv (IPv6) */ -static void -udp_dscp_v6_test(void **state) { - isc_result_t result; - isc_sockaddr_t addr1, addr2; - struct in6_addr in6; - char sendbuf[BUFSIZ], recvbuf[BUFSIZ]; - completion_t completion; - isc_region_t r; - isc_socketevent_t *socketevent; - int n; - - UNUSED(state); - - n = inet_pton(AF_INET6, "::1", &in6.s6_addr); - assert_true(n == 1); - isc_sockaddr_fromin6(&addr1, &in6, 0); - isc_sockaddr_fromin6(&addr2, &in6, 0); - - result = isc_socket_create(socketmgr, PF_INET6, isc_sockettype_udp, - &s1); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s1, &addr1, 0); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s1, &addr1); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr1) != 0); - - result = isc_socket_create(socketmgr, PF_INET6, isc_sockettype_udp, - &s2); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s2, &addr2, 0); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s2, &addr2); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr2) != 0); - - result = isc_task_create(taskmgr, 0, &test_task); - assert_int_equal(result, ISC_R_SUCCESS); - - snprintf(sendbuf, sizeof(sendbuf), "Hello"); - r.base = (void *)sendbuf; - r.length = strlen(sendbuf) + 1; - - completion_init(&completion); - - socketevent = isc_socket_socketevent( - test_mctx, s1, ISC_SOCKEVENT_SENDDONE, event_done, &completion); - assert_non_null(socketevent); - - if ((isc_net_probedscp() & ISC_NET_DSCPPKTV6) != 0) { - socketevent->dscp = 056; /* EF */ - socketevent->attributes = ISC_SOCKEVENTATTR_DSCP; - } else if ((isc_net_probedscp() & ISC_NET_DSCPSETV6) != 0) { - isc_socket_dscp(s1, 056); /* EF */ - } - - recv_dscp = false; - recv_dscp_value = 0; - - result = isc_socket_sendto2(s1, &r, test_task, &addr2, NULL, - socketevent, 0); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - - r.base = (void *)recvbuf; - r.length = BUFSIZ; - completion_init(&completion); - result = isc_socket_recv(s2, &r, 1, test_task, event_done, &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_string_equal(recvbuf, "Hello"); - if ((isc_net_probedscp() & ISC_NET_DSCPRECVV6) != 0) { - assert_true(recv_dscp); - assert_int_equal(recv_dscp_value, 056); - } else { - assert_false(recv_dscp); - } -} - -/* Test TCP sendto/recv (IPv4) */ -static void -tcp_dscp_v4_test(void **state) { - isc_result_t result; - isc_sockaddr_t addr1; - struct in_addr in; - char sendbuf[BUFSIZ], recvbuf[BUFSIZ]; - completion_t completion, completion2; - isc_region_t r; - - UNUSED(state); - - in.s_addr = inet_addr("127.0.0.1"); - isc_sockaddr_fromin(&addr1, &in, 0); - - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_tcp, &s1); - assert_int_equal(result, ISC_R_SUCCESS); - - result = isc_socket_bind(s1, &addr1, 0); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s1, &addr1); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr1) != 0); - - result = isc_socket_listen(s1, 3); - assert_int_equal(result, ISC_R_SUCCESS); - - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_tcp, &s2); - assert_int_equal(result, ISC_R_SUCCESS); - - result = isc_task_create(taskmgr, 0, &test_task); - assert_int_equal(result, ISC_R_SUCCESS); - - completion_init(&completion2); - result = isc_socket_accept(s1, test_task, accept_done, &completion2); - assert_int_equal(result, ISC_R_SUCCESS); - - completion_init(&completion); - result = isc_socket_connect(s2, &addr1, test_task, event_done, - &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor2(&completion, &completion2); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_true(atomic_load(&completion2.done)); - assert_int_equal(completion2.result, ISC_R_SUCCESS); - s3 = (isc_socket_t *)atomic_load(&completion2.socket); - - isc_socket_dscp(s2, 056); /* EF */ - - snprintf(sendbuf, sizeof(sendbuf), "Hello"); - r.base = (void *)sendbuf; - r.length = strlen(sendbuf) + 1; - - recv_dscp = false; - recv_dscp_value = 0; - - completion_init(&completion); - result = isc_socket_sendto(s2, &r, test_task, event_done, &completion, - NULL, NULL); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - - r.base = (void *)recvbuf; - r.length = BUFSIZ; - completion_init(&completion); - result = isc_socket_recv(s3, &r, 1, test_task, event_done, &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_string_equal(recvbuf, "Hello"); - - if ((isc_net_probedscp() & ISC_NET_DSCPRECVV4) != 0) { - if (recv_dscp) { - assert_int_equal(recv_dscp_value, 056); - } - } else { - assert_false(recv_dscp); - } -} - -/* Test TCP sendto/recv (IPv6) */ -static void -tcp_dscp_v6_test(void **state) { - isc_result_t result; - isc_sockaddr_t addr1; - struct in6_addr in6; - char sendbuf[BUFSIZ], recvbuf[BUFSIZ]; - completion_t completion, completion2; - isc_region_t r; - int n; - - UNUSED(state); - - n = inet_pton(AF_INET6, "::1", &in6.s6_addr); - assert_true(n == 1); - isc_sockaddr_fromin6(&addr1, &in6, 0); - - result = isc_socket_create(socketmgr, PF_INET6, isc_sockettype_tcp, - &s1); - assert_int_equal(result, ISC_R_SUCCESS); - - result = isc_socket_bind(s1, &addr1, 0); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s1, &addr1); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr1) != 0); - - result = isc_socket_listen(s1, 3); - assert_int_equal(result, ISC_R_SUCCESS); - - result = isc_socket_create(socketmgr, PF_INET6, isc_sockettype_tcp, - &s2); - assert_int_equal(result, ISC_R_SUCCESS); - - result = isc_task_create(taskmgr, 0, &test_task); - assert_int_equal(result, ISC_R_SUCCESS); - - completion_init(&completion2); - result = isc_socket_accept(s1, test_task, accept_done, &completion2); - assert_int_equal(result, ISC_R_SUCCESS); - - completion_init(&completion); - result = isc_socket_connect(s2, &addr1, test_task, event_done, - &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor2(&completion, &completion2); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_true(atomic_load(&completion2.done)); - assert_int_equal(completion2.result, ISC_R_SUCCESS); - s3 = (isc_socket_t *)atomic_load(&completion2.socket); - - isc_socket_dscp(s2, 056); /* EF */ - - snprintf(sendbuf, sizeof(sendbuf), "Hello"); - r.base = (void *)sendbuf; - r.length = strlen(sendbuf) + 1; - - recv_dscp = false; - recv_dscp_value = 0; - - completion_init(&completion); - result = isc_socket_sendto(s2, &r, test_task, event_done, &completion, - NULL, NULL); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - - r.base = (void *)recvbuf; - r.length = BUFSIZ; - completion_init(&completion); - result = isc_socket_recv(s3, &r, 1, test_task, event_done, &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_string_equal(recvbuf, "Hello"); - - if ((isc_net_probedscp() & ISC_NET_DSCPRECVV6) != 0) { - /* - * IPV6_RECVTCLASS is undefined for TCP however - * if we do get it it should be the value we set. - */ - if (recv_dscp) { - assert_int_equal(recv_dscp_value, 056); - } - } else { - assert_false(recv_dscp); - } -} - -/* probe dscp capabilities */ -static void -net_probedscp_test(void **state) { - unsigned int n; - - UNUSED(state); - - n = isc_net_probedscp(); - assert_true((n & ~ISC_NET_DSCPALL) == 0); - - /* ISC_NET_DSCPSETV4 MUST be set if any is set. */ - if (n & (ISC_NET_DSCPPKTV4 | ISC_NET_DSCPRECVV4)) { - assert_true((n & ISC_NET_DSCPSETV4) != 0); - } - - /* ISC_NET_DSCPSETV6 MUST be set if any is set. */ - if (n & (ISC_NET_DSCPPKTV6 | ISC_NET_DSCPRECVV6)) { - assert_true((n & ISC_NET_DSCPSETV6) != 0); - } - -#if 0 - fprintf(stdout,"IPv4:%s%s%s\n", - (n & ISC_NET_DSCPSETV4) ? " set" : "none", - (n & ISC_NET_DSCPPKTV4) ? " packet" : "", - (n & ISC_NET_DSCPRECVV4) ? " receive" : ""); - - fprintf(stdout,"IPv6:%s%s%s\n", - (n & ISC_NET_DSCPSETV6) ? " set" : "none", - (n & ISC_NET_DSCPPKTV6) ? " packet" : "", - (n & ISC_NET_DSCPRECVV6) ? " receive" : ""); -#endif /* if 0 */ -} - -/* Test UDP truncation detection */ -static void -udp_trunc_test(void **state) { - isc_result_t result; - isc_sockaddr_t addr1, addr2; - struct in_addr in; - char sendbuf[BUFSIZ * 2], recvbuf[BUFSIZ]; - completion_t completion; - isc_region_t r; - isc_socketevent_t *socketevent; - - UNUSED(state); - - in.s_addr = inet_addr("127.0.0.1"); - isc_sockaddr_fromin(&addr1, &in, 0); - isc_sockaddr_fromin(&addr2, &in, 0); - - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s1); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s1, &addr1, ISC_SOCKET_REUSEADDRESS); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s1, &addr1); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr1) != 0); - result = isc_socket_create(socketmgr, PF_INET, isc_sockettype_udp, &s2); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_bind(s2, &addr2, ISC_SOCKET_REUSEADDRESS); - assert_int_equal(result, ISC_R_SUCCESS); - result = isc_socket_getsockname(s2, &addr2); - assert_int_equal(result, ISC_R_SUCCESS); - assert_true(isc_sockaddr_getport(&addr2) != 0); - - result = isc_task_create(taskmgr, 0, &test_task); - assert_int_equal(result, ISC_R_SUCCESS); - - /* - * Send a message that will not be truncated. - */ - memset(sendbuf, 0xff, sizeof(sendbuf)); - snprintf(sendbuf, sizeof(sendbuf), "Hello"); - r.base = (void *)sendbuf; - r.length = strlen(sendbuf) + 1; - - completion_init(&completion); - - socketevent = isc_socket_socketevent( - test_mctx, s1, ISC_SOCKEVENT_SENDDONE, event_done, &completion); - assert_non_null(socketevent); - - result = isc_socket_sendto2(s1, &r, test_task, &addr2, NULL, - socketevent, 0); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - - r.base = (void *)recvbuf; - r.length = BUFSIZ; - completion_init(&completion); - recv_trunc = false; - result = isc_socket_recv(s2, &r, 1, test_task, event_done, &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_string_equal(recvbuf, "Hello"); - assert_false(recv_trunc); - - /* - * Send a message that will be truncated. - */ - memset(sendbuf, 0xff, sizeof(sendbuf)); - snprintf(sendbuf, sizeof(sendbuf), "Hello"); - r.base = (void *)sendbuf; - r.length = sizeof(sendbuf); - - completion_init(&completion); - - socketevent = isc_socket_socketevent( - test_mctx, s1, ISC_SOCKEVENT_SENDDONE, event_done, &completion); - assert_non_null(socketevent); - - result = isc_socket_sendto2(s1, &r, test_task, &addr2, NULL, - socketevent, 0); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - - r.base = (void *)recvbuf; - r.length = BUFSIZ; - completion_init(&completion); - recv_trunc = false; - result = isc_socket_recv(s2, &r, 1, test_task, event_done, &completion); - assert_int_equal(result, ISC_R_SUCCESS); - waitfor(&completion); - assert_true(atomic_load(&completion.done)); - assert_int_equal(completion.result, ISC_R_SUCCESS); - assert_string_equal(recvbuf, "Hello"); - assert_true(recv_trunc); -} - -/* - * Main - */ -int -main(void) { - const struct CMUnitTest tests[] = { - cmocka_unit_test_setup_teardown(udp_sendto_test, _setup, - _teardown), - cmocka_unit_test_setup_teardown(tcp_dscp_v4_test, _setup, - _teardown), - cmocka_unit_test_setup_teardown(tcp_dscp_v6_test, _setup, - _teardown), - cmocka_unit_test_setup_teardown(udp_dscp_v4_test, _setup, - _teardown), - cmocka_unit_test_setup_teardown(udp_dscp_v6_test, _setup, - _teardown), - cmocka_unit_test_setup_teardown(net_probedscp_test, _setup, - _teardown), - cmocka_unit_test_setup_teardown(udp_trunc_test, _setup, - _teardown), - }; - - return (cmocka_run_group_tests(tests, NULL, NULL)); -} - -#else /* HAVE_CMOCKA */ - -#include - -int -main(void) { - printf("1..0 # Skipped: cmocka not available\n"); - return (SKIPPED_TEST_EXIT_CODE); -} - -#endif /* if HAVE_CMOCKA */ diff --git a/lib/isc/tests/task_test.c b/lib/isc/tests/task_test.c index 2be810d179..1b6d75d47d 100644 --- a/lib/isc/tests/task_test.c +++ b/lib/isc/tests/task_test.c @@ -721,7 +721,7 @@ manytasks(void **state) { isc_mem_debugging = ISC_MEM_DEBUGRECORD; isc_mem_create(&mctx); - isc_managers_create(mctx, 4, 0, 0, &netmgr, &taskmgr, NULL, NULL); + isc_managers_create(mctx, 4, 0, &netmgr, &taskmgr, NULL); atomic_init(&done, false); @@ -736,7 +736,7 @@ manytasks(void **state) { } UNLOCK(&lock); - isc_managers_destroy(&netmgr, &taskmgr, NULL, NULL); + isc_managers_destroy(&netmgr, &taskmgr, NULL); isc_mem_destroy(&mctx); isc_condition_destroy(&cv); diff --git a/lib/isccfg/namedconf.c b/lib/isccfg/namedconf.c index fcbd929106..4067adf093 100644 --- a/lib/isccfg/namedconf.c +++ b/lib/isccfg/namedconf.c @@ -1271,7 +1271,7 @@ static cfg_clausedef_t options_clauses[] = { { "random-device", &cfg_type_qstringornone, 0 }, { "recursing-file", &cfg_type_qstring, 0 }, { "recursive-clients", &cfg_type_uint32, 0 }, - { "reserved-sockets", &cfg_type_uint32, 0 }, + { "reserved-sockets", &cfg_type_uint32, CFG_CLAUSEFLAG_DEPRECATED }, { "secroots-file", &cfg_type_qstring, 0 }, { "serial-queries", NULL, CFG_CLAUSEFLAG_ANCIENT }, { "serial-query-rate", &cfg_type_uint32, 0 }, diff --git a/lib/ns/include/ns/events.h b/lib/ns/include/ns/events.h index b8da178b05..4c5b7bba68 100644 --- a/lib/ns/include/ns/events.h +++ b/lib/ns/include/ns/events.h @@ -20,3 +20,4 @@ #define NS_EVENT_CLIENTCONTROL (ISC_EVENTCLASS_NS + 0) #define NS_EVENT_HOOKASYNCDONE (ISC_EVENTCLASS_NS + 1) +#define NS_EVENT_IFSCAN (ISC_EVENTCLASS_NS + 2) diff --git a/lib/ns/include/ns/interfacemgr.h b/lib/ns/include/ns/interfacemgr.h index db16ccd70d..c52392156a 100644 --- a/lib/ns/include/ns/interfacemgr.h +++ b/lib/ns/include/ns/interfacemgr.h @@ -97,10 +97,9 @@ struct ns_interface { isc_result_t ns_interfacemgr_create(isc_mem_t *mctx, ns_server_t *sctx, isc_taskmgr_t *taskmgr, isc_timermgr_t *timermgr, - isc_socketmgr_t *socketmgr, isc_nm_t *nm, - dns_dispatchmgr_t *dispatchmgr, isc_task_t *task, - dns_geoip_databases_t *geoip, int ncpus, - ns_interfacemgr_t **mgrp); + isc_nm_t *nm, dns_dispatchmgr_t *dispatchmgr, + isc_task_t *task, dns_geoip_databases_t *geoip, + int ncpus, bool scan, ns_interfacemgr_t **mgrp); /*%< * Create a new interface manager. * diff --git a/lib/ns/interfacemgr.c b/lib/ns/interfacemgr.c index 9673902b61..316d0142f4 100644 --- a/lib/ns/interfacemgr.c +++ b/lib/ns/interfacemgr.c @@ -33,10 +33,8 @@ #ifdef HAVE_NET_ROUTE_H #include #if defined(RTM_VERSION) && defined(RTM_NEWADDR) && defined(RTM_DELADDR) -#define USE_ROUTE_SOCKET 1 -#define ROUTE_SOCKET_PROTOCOL PF_ROUTE -#define MSGHDR rt_msghdr -#define MSGTYPE rtm_type +#define MSGHDR rt_msghdr +#define MSGTYPE rtm_type #endif /* if defined(RTM_VERSION) && defined(RTM_NEWADDR) && \ * defined(RTM_DELADDR) */ #endif /* ifdef HAVE_NET_ROUTE_H */ @@ -45,10 +43,8 @@ #include #include #if defined(RTM_NEWADDR) && defined(RTM_DELADDR) -#define USE_ROUTE_SOCKET 1 -#define ROUTE_SOCKET_PROTOCOL PF_NETLINK -#define MSGHDR nlmsghdr -#define MSGTYPE nlmsg_type +#define MSGHDR nlmsghdr +#define MSGTYPE nlmsg_type #endif /* if defined(RTM_NEWADDR) && defined(RTM_DELADDR) */ #endif /* if defined(HAVE_LINUX_NETLINK_H) && defined(HAVE_LINUX_RTNETLINK_H) \ */ @@ -70,14 +66,13 @@ struct ns_interfacemgr { unsigned int magic; /*%< Magic number */ isc_refcount_t references; isc_mutex_t lock; - isc_mem_t *mctx; /*%< Memory context */ - ns_server_t *sctx; /*%< Server context */ - isc_taskmgr_t *taskmgr; /*%< Task manager */ - isc_task_t *excl; /*%< Exclusive task */ - isc_timermgr_t *timermgr; /*%< Timer manager */ - isc_socketmgr_t *socketmgr; /*%< Socket manager */ - isc_nm_t *nm; /*%< Net manager */ - int ncpus; /*%< Number of workers */ + isc_mem_t *mctx; /*%< Memory context */ + ns_server_t *sctx; /*%< Server context */ + isc_taskmgr_t *taskmgr; /*%< Task manager */ + isc_task_t *excl; /*%< Exclusive task */ + isc_timermgr_t *timermgr; /*%< Timer manager */ + isc_nm_t *nm; /*%< Net manager */ + int ncpus; /*%< Number of workers */ dns_dispatchmgr_t *dispatchmgr; unsigned int generation; /*%< Current generation no */ ns_listenlist_t *listenon4; @@ -88,11 +83,7 @@ struct ns_interfacemgr { int backlog; /*%< Listen queue size */ atomic_bool shuttingdown; /*%< Interfacemgr shutting down */ ns_clientmgr_t **clientmgrs; /*%< Client managers */ -#ifdef USE_ROUTE_SOCKET - isc_task_t *task; - isc_socket_t *route; - unsigned char buf[2048]; -#endif /* ifdef USE_ROUTE_SOCKET */ + isc_nmhandle_t *route; }; static void @@ -101,35 +92,42 @@ purge_old_interfaces(ns_interfacemgr_t *mgr); static void clearlistenon(ns_interfacemgr_t *mgr); -#ifdef USE_ROUTE_SOCKET static void -route_event(isc_task_t *task, isc_event_t *event) { - isc_socketevent_t *sevent = NULL; - ns_interfacemgr_t *mgr = NULL; - isc_region_t r; - isc_result_t result; - struct MSGHDR *rtm; - bool done = true; +scan_event(isc_task_t *task, isc_event_t *event) { + ns_interfacemgr_t *mgr = (ns_interfacemgr_t *)event->ev_arg; UNUSED(task); - REQUIRE(event->ev_type == ISC_SOCKEVENT_RECVDONE); - mgr = event->ev_arg; - sevent = (isc_socketevent_t *)event; + ns_interfacemgr_scan(mgr, false); + isc_event_free(&event); +} - if (sevent->result != ISC_R_SUCCESS) { - if (sevent->result != ISC_R_CANCELED) { - isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_ERROR, - "automatic interface scanning " - "terminated: %s", - isc_result_totext(sevent->result)); - } - ns_interfacemgr_detach(&mgr); - isc_event_free(&event); +static void +route_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, + void *arg) { + ns_interfacemgr_t *mgr = (ns_interfacemgr_t *)arg; + struct MSGHDR *rtm = NULL; + bool done = true; + + isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_DEBUG(3), "route_recv: %s", + isc_result_totext(eresult)); + + if (handle == NULL) { return; } - rtm = (struct MSGHDR *)mgr->buf; + if (eresult != ISC_R_SUCCESS) { + if (eresult != ISC_R_CANCELED) { + isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_ERROR, + "automatic interface scanning " + "terminated: %s", + isc_result_totext(eresult)); + } + isc_nmhandle_detach(&mgr->route); + return; + } + + rtm = (struct MSGHDR *)region->base; #ifdef RTM_VERSION if (rtm->rtm_version != RTM_VERSION) { isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_ERROR, @@ -137,8 +135,7 @@ route_event(isc_task_t *task, isc_event_t *event) { "rtm->rtm_version mismatch (%u != %u) " "recompile required", rtm->rtm_version, RTM_VERSION); - ns_interfacemgr_detach(&mgr); - isc_event_free(&event); + isc_nmhandle_detach(&mgr->route); return; } #endif /* ifdef RTM_VERSION */ @@ -147,7 +144,11 @@ route_event(isc_task_t *task, isc_event_t *event) { case RTM_NEWADDR: case RTM_DELADDR: if (mgr->route != NULL && mgr->sctx->interface_auto) { - ns_interfacemgr_scan(mgr, false); + isc_event_t *event = NULL; + event = isc_event_allocate(mgr->mctx, mgr, + NS_EVENT_IFSCAN, scan_event, + mgr, sizeof(*event)); + isc_task_send(mgr->excl, &event); } break; default: @@ -156,70 +157,67 @@ route_event(isc_task_t *task, isc_event_t *event) { LOCK(&mgr->lock); if (mgr->route != NULL) { - /* - * Look for next route event. - */ - r.base = mgr->buf; - r.length = sizeof(mgr->buf); - result = isc_socket_recv(mgr->route, &r, 1, mgr->task, - route_event, mgr); - if (result == ISC_R_SUCCESS) { - done = false; - } + isc_nm_read(handle, route_recv, mgr); + done = false; } UNLOCK(&mgr->lock); if (done) { - ns_interfacemgr_detach(&mgr); + isc_nmhandle_detach(&mgr->route); } - isc_event_free(&event); return; } -#endif /* ifdef USE_ROUTE_SOCKET */ + +static void +route_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { + ns_interfacemgr_t *mgr = (ns_interfacemgr_t *)arg; + + isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_DEBUG(3), + "route_connected: %s", isc_result_totext(eresult)); + + if (eresult != ISC_R_SUCCESS) { + return; + } + + INSIST(mgr->route == NULL); + + isc_nmhandle_attach(handle, &mgr->route); + isc_nm_read(handle, route_recv, mgr); +} isc_result_t ns_interfacemgr_create(isc_mem_t *mctx, ns_server_t *sctx, isc_taskmgr_t *taskmgr, isc_timermgr_t *timermgr, - isc_socketmgr_t *socketmgr, isc_nm_t *nm, - dns_dispatchmgr_t *dispatchmgr, isc_task_t *task, - dns_geoip_databases_t *geoip, int ncpus, - ns_interfacemgr_t **mgrp) { + isc_nm_t *nm, dns_dispatchmgr_t *dispatchmgr, + isc_task_t *task, dns_geoip_databases_t *geoip, + int ncpus, bool scan, ns_interfacemgr_t **mgrp) { isc_result_t result; - ns_interfacemgr_t *mgr; + ns_interfacemgr_t *mgr = NULL; -#ifndef USE_ROUTE_SOCKET UNUSED(task); -#endif /* ifndef USE_ROUTE_SOCKET */ REQUIRE(mctx != NULL); REQUIRE(mgrp != NULL); REQUIRE(*mgrp == NULL); mgr = isc_mem_get(mctx, sizeof(*mgr)); + *mgr = (ns_interfacemgr_t){ .taskmgr = taskmgr, + .timermgr = timermgr, + .nm = nm, + .dispatchmgr = dispatchmgr, + .generation = 1, + .ncpus = ncpus }; - mgr->mctx = NULL; isc_mem_attach(mctx, &mgr->mctx); - - mgr->sctx = NULL; ns_server_attach(sctx, &mgr->sctx); isc_mutex_init(&mgr->lock); - mgr->excl = NULL; result = isc_taskmgr_excltask(taskmgr, &mgr->excl); if (result != ISC_R_SUCCESS) { goto cleanup_lock; } - mgr->taskmgr = taskmgr; - mgr->timermgr = timermgr; - mgr->socketmgr = socketmgr; - mgr->nm = nm; - mgr->dispatchmgr = dispatchmgr; - mgr->generation = 1; - mgr->listenon4 = NULL; - mgr->listenon6 = NULL; - mgr->ncpus = ncpus; atomic_init(&mgr->shuttingdown, false); ISC_LIST_INIT(mgr->interfaces); @@ -244,28 +242,16 @@ ns_interfacemgr_create(isc_mem_t *mctx, ns_server_t *sctx, UNUSED(geoip); #endif /* if defined(HAVE_GEOIP2) */ -#ifdef USE_ROUTE_SOCKET - mgr->route = NULL; - result = isc_socket_create(mgr->socketmgr, ROUTE_SOCKET_PROTOCOL, - isc_sockettype_raw, &mgr->route); - switch (result) { - case ISC_R_NOPERM: - case ISC_R_SUCCESS: - case ISC_R_NOTIMPLEMENTED: - case ISC_R_FAMILYNOSUPPORT: - break; - default: - goto cleanup_aclenv; + if (scan) { + result = isc_nm_routeconnect(nm, route_connected, mgr, 0); + if (result != ISC_R_SUCCESS) { + isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_INFO, + "unable to open route socket: %s", + isc_result_totext(result)); + } } - mgr->task = NULL; - if (mgr->route != NULL) { - isc_task_attach(task, &mgr->task); - } - isc_refcount_init(&mgr->references, (mgr->route != NULL) ? 2 : 1); -#else /* ifdef USE_ROUTE_SOCKET */ isc_refcount_init(&mgr->references, 1); -#endif /* ifdef USE_ROUTE_SOCKET */ mgr->magic = IFMGR_MAGIC; *mgrp = mgr; @@ -278,25 +264,8 @@ ns_interfacemgr_create(isc_mem_t *mctx, ns_server_t *sctx, RUNTIME_CHECK(result == ISC_R_SUCCESS); } -#ifdef USE_ROUTE_SOCKET - if (mgr->route != NULL) { - isc_region_t r = { mgr->buf, sizeof(mgr->buf) }; - - result = isc_socket_recv(mgr->route, &r, 1, mgr->task, - route_event, mgr); - if (result != ISC_R_SUCCESS) { - isc_task_detach(&mgr->task); - isc_socket_detach(&mgr->route); - ns_interfacemgr_detach(&mgr); - } - } -#endif /* ifdef USE_ROUTE_SOCKET */ return (ISC_R_SUCCESS); -#ifdef USE_ROUTE_SOCKET -cleanup_aclenv: - dns_aclenv_detach(&mgr->aclenv); -#endif /* ifdef USE_ROUTE_SOCKET */ cleanup_listenon: ns_listenlist_detach(&mgr->listenon4); ns_listenlist_detach(&mgr->listenon6); @@ -314,14 +283,9 @@ ns_interfacemgr_destroy(ns_interfacemgr_t *mgr) { isc_refcount_destroy(&mgr->references); -#ifdef USE_ROUTE_SOCKET if (mgr->route != NULL) { - isc_socket_detach(&mgr->route); + isc_nmhandle_detach(&mgr->route); } - if (mgr->task != NULL) { - isc_task_detach(&mgr->task); - } -#endif /* ifdef USE_ROUTE_SOCKET */ dns_aclenv_detach(&mgr->aclenv); ns_listenlist_detach(&mgr->listenon4); ns_listenlist_detach(&mgr->listenon6); @@ -387,15 +351,13 @@ ns_interfacemgr_shutdown(ns_interfacemgr_t *mgr) { */ mgr->generation++; atomic_store(&mgr->shuttingdown, true); -#ifdef USE_ROUTE_SOCKET + LOCK(&mgr->lock); if (mgr->route != NULL) { - isc_socket_cancel(mgr->route, mgr->task, ISC_SOCKCANCEL_RECV); - isc_socket_detach(&mgr->route); - isc_task_detach(&mgr->task); + isc_nmhandle_detach(&mgr->route); } UNLOCK(&mgr->lock); -#endif /* ifdef USE_ROUTE_SOCKET */ + purge_old_interfaces(mgr); } diff --git a/lib/ns/tests/nstest.c b/lib/ns/tests/nstest.c index a1bbba73f4..15614e1059 100644 --- a/lib/ns/tests/nstest.c +++ b/lib/ns/tests/nstest.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -57,7 +56,6 @@ isc_nm_t *netmgr = NULL; isc_taskmgr_t *taskmgr = NULL; isc_task_t *maintask = NULL; isc_timermgr_t *timermgr = NULL; -isc_socketmgr_t *socketmgr = NULL; dns_zonemgr_t *zonemgr = NULL; dns_dispatchmgr_t *dispatchmgr = NULL; ns_clientmgr_t *clientmgr = NULL; @@ -202,8 +200,7 @@ cleanup_managers(void) { isc_managers_destroy(netmgr == NULL ? NULL : &netmgr, taskmgr == NULL ? NULL : &taskmgr, - timermgr == NULL ? NULL : &timermgr, - socketmgr == NULL ? NULL : &socketmgr); + timermgr == NULL ? NULL : &timermgr); if (app_running) { isc_app_finish(); @@ -226,8 +223,7 @@ create_managers(void) { isc_event_t *event = NULL; ncpus = isc_os_ncpus(); - isc_managers_create(mctx, ncpus, 0, 0, &netmgr, &taskmgr, &timermgr, - &socketmgr); + isc_managers_create(mctx, ncpus, 0, &netmgr, &taskmgr, &timermgr); CHECK(isc_task_create_bound(taskmgr, 0, &maintask, 0)); isc_taskmgr_setexcltask(taskmgr, maintask); CHECK(isc_task_onshutdown(maintask, shutdown_managers, NULL)); @@ -236,8 +232,8 @@ create_managers(void) { CHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); - CHECK(ns_interfacemgr_create(mctx, sctx, taskmgr, timermgr, socketmgr, - netmgr, dispatchmgr, maintask, NULL, ncpus, + CHECK(ns_interfacemgr_create(mctx, sctx, taskmgr, timermgr, netmgr, + dispatchmgr, maintask, NULL, ncpus, false, &interfacemgr)); CHECK(ns_listenlist_default(mctx, port, -1, true, &listenon)); @@ -248,12 +244,6 @@ create_managers(void) { scan_interfaces, NULL, sizeof(isc_event_t)); isc_task_send(maintask, &event); - /* - * There's no straightforward way to determine - * whether the interfaces have been scanned, - * we'll just sleep for a bit and hope. - */ - ns_test_nap(500000); clientmgr = ns_interfacemgr_getclientmgr(interfacemgr); atomic_store(&run_managers, true); diff --git a/lib/ns/tests/nstest.h b/lib/ns/tests/nstest.h index 8b3b2ad516..ac16abd166 100644 --- a/lib/ns/tests/nstest.h +++ b/lib/ns/tests/nstest.h @@ -54,7 +54,6 @@ extern isc_log_t *lctx; extern isc_taskmgr_t *taskmgr; extern isc_task_t *maintask; extern isc_timermgr_t *timermgr; -extern isc_socketmgr_t *socketmgr; extern dns_zonemgr_t *zonemgr; extern dns_dispatchmgr_t *dispatchmgr; extern ns_clientmgr_t *clientmgr; diff --git a/util/copyrights b/util/copyrights index 49911e7604..ea2e3051c4 100644 --- a/util/copyrights +++ b/util/copyrights @@ -1691,7 +1691,6 @@ ./lib/isc/include/isc/serial.h C 1999,2000,2001,2004,2005,2006,2007,2009,2016,2018,2019,2020,2021 ./lib/isc/include/isc/siphash.h C 2019,2020,2021 ./lib/isc/include/isc/sockaddr.h C 1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2009,2012,2015,2016,2018,2019,2020,2021 -./lib/isc/include/isc/socket.h C 1998,1999,2000,2001,2002,2004,2005,2006,2007,2008,2009,2011,2012,2013,2014,2016,2018,2019,2020,2021 ./lib/isc/include/isc/stat.h C 2004,2007,2014,2016,2018,2019,2020,2021 ./lib/isc/include/isc/stats.h C 2009,2012,2016,2018,2019,2020,2021 ./lib/isc/include/isc/stdatomic.h C 2019,2020,2021 @@ -1761,8 +1760,6 @@ ./lib/isc/serial.c C 1999,2000,2001,2004,2005,2007,2016,2018,2019,2020,2021 ./lib/isc/siphash.c C 2019,2020,2021 ./lib/isc/sockaddr.c C 1999,2000,2001,2002,2003,2004,2005,2006,2007,2010,2011,2012,2014,2015,2016,2017,2018,2019,2020,2021 -./lib/isc/socket.c C 1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021 -./lib/isc/socket_p.h C 2021 ./lib/isc/stats.c C 2009,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021 ./lib/isc/stdio.c C 2000,2001,2004,2007,2011,2012,2013,2014,2016,2018,2019,2020,2021 ./lib/isc/stdtime.c C 1999,2000,2001,2004,2005,2007,2016,2018,2019,2020,2021 @@ -1800,7 +1797,6 @@ ./lib/isc/tests/safe_test.c C 2013,2015,2016,2017,2018,2019,2020,2021 ./lib/isc/tests/siphash_test.c C 2019,2020,2021 ./lib/isc/tests/sockaddr_test.c C 2012,2015,2016,2017,2018,2019,2020,2021 -./lib/isc/tests/socket_test.c C 2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021 ./lib/isc/tests/stats_test.c C 2021 ./lib/isc/tests/symtab_test.c C 2011,2012,2013,2016,2018,2019,2020,2021 ./lib/isc/tests/task_test.c C 2011,2012,2016,2017,2018,2019,2020,2021