diff --git a/.gitignore b/.gitignore index 55bc4ffdd4..1997dd73a2 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ *.rej *.so *_test +*.ipch # vscode/intellisense precompiled header *~ .ccache/ .cproject diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 89cd470ac6..86f03503fd 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -675,6 +675,7 @@ unit:gcc:bionic:amd64: asan:sid:amd64: variables: CC: gcc + ASAN_OPTIONS: "detect_leaks=0" CFLAGS: "-Wall -Wextra -O2 -g -fsanitize=address,undefined -DISC_MEM_USE_INTERNAL_MALLOC=0" LDFLAGS: "-fsanitize=address,undefined" EXTRA_CONFIGURE: "--with-libidn2" @@ -901,6 +902,7 @@ msvc:windows:amd64: "with-vcredist=C:/Program Files (x86)/Microsoft Visual Studio/2017/BuildTools/VC/Redist/MSVC/14.16.27012/vcredist_x64.exe" "with-openssl=C:/OpenSSL" "with-libxml2=C:/libxml2" + "with-libuv=C:/libuv" "without-python" "with-system-tests" x64' diff --git a/CHANGES b/CHANGES index 128da21376..49802fcf80 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,14 @@ +5317. [func] A new asynchronous network communications system + based on libuv is now used for listening for + incoming requests and responding to them. (The + old isc_socket API remains in use for sending + iterative queries and processing responses; this + will be changed too in a later release.) + + This change will make it easier to improve + performance and implement new protocol layers + (e.g., DNS over TLS) in the future. [GL #29] + 5316. [func] A new "dnssec-policy" option has been added to named.conf to implement a key and signing policy (KASP) for zones. When this option is in use, diff --git a/README.md b/README.md index 9bd13231c7..8d371e40c3 100644 --- a/README.md +++ b/README.md @@ -129,8 +129,9 @@ include: * New "dnssec-policy" statement to configure a key and signing policy for zones, enabling automatic key regeneration and rollover. +* A new network manager based on libuv. * Support for the new GeoIP2 geolocation API -* Improved DNSSEC key configuration using `dnssec-keys` +* Improved DNSSEC trust anchor configuration using `dnssec-keys` * YAML output for `dig`, `mdig`, and `delv`. ### Building BIND diff --git a/bin/dig/dighost.c b/bin/dig/dighost.c index e2018a7af7..bc712a1ad0 100644 --- a/bin/dig/dighost.c +++ b/bin/dig/dighost.c @@ -1379,7 +1379,7 @@ setup_libs(void) { isc_log_setdebuglevel(lctx, 0); - result = isc_taskmgr_create(mctx, 1, 0, &taskmgr); + result = isc_taskmgr_create(mctx, 1, 0, NULL, &taskmgr); check_result(result, "isc_taskmgr_create"); result = isc_task_create(taskmgr, 0, &global_task); diff --git a/bin/dnssec/dnssec-signzone.c b/bin/dnssec/dnssec-signzone.c index 5703dde634..ff87a48469 100644 --- a/bin/dnssec/dnssec-signzone.c +++ b/bin/dnssec/dnssec-signzone.c @@ -3797,7 +3797,7 @@ main(int argc, char *argv[]) { print_time(outfp); print_version(outfp); - result = isc_taskmgr_create(mctx, ntasks, 0, &taskmgr); + result = isc_taskmgr_create(mctx, ntasks, 0, NULL, &taskmgr); if (result != ISC_R_SUCCESS) fatal("failed to create task manager: %s", isc_result_totext(result)); diff --git a/bin/named/include/named/globals.h b/bin/named/include/named/globals.h index eac0fe18be..7d5877c8ac 100644 --- a/bin/named/include/named/globals.h +++ b/bin/named/include/named/globals.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -62,6 +63,7 @@ EXTERN bool named_g_run_done INIT(false); */ EXTERN isc_timermgr_t * named_g_timermgr INIT(NULL); EXTERN isc_socketmgr_t * named_g_socketmgr INIT(NULL); +EXTERN isc_nm_t * named_g_nm INIT(NULL); EXTERN cfg_parser_t * named_g_parser INIT(NULL); EXTERN cfg_parser_t * named_g_addparser INIT(NULL); EXTERN const char * named_g_version INIT(VERSION); diff --git a/bin/named/main.c b/bin/named/main.c index 909b3ad52d..d707916a7c 100644 --- a/bin/named/main.c +++ b/bin/named/main.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -124,7 +125,6 @@ static int maxudp = 0; /* * -T options: */ -static bool clienttest = false; static bool dropedns = false; static bool ednsformerr = false; static bool ednsnotimp = false; @@ -622,17 +622,12 @@ parse_T_opt(char *option) { /* * force the server to behave (or misbehave) in * specified ways for testing purposes. - * - * clienttest: make clients single shot with their - * own memory context. * delay=xxxx: delay client responses by xxxx ms to * simulate remote servers. * dscp=x: check that dscp values are as * expected and assert otherwise. */ - if (!strcmp(option, "clienttest")) { - clienttest = true; - } else if (!strncmp(option, "delay=", 6)) { + if (!strncmp(option, "delay=", 6)) { delay = atoi(option + 6); } else if (!strcmp(option, "dropedns")) { dropedns = true; @@ -897,8 +892,15 @@ create_managers(void) { "using %u UDP listener%s per interface", named_g_udpdisp, named_g_udpdisp == 1 ? "" : "s"); + named_g_nm = isc_nm_start(named_g_mctx, named_g_cpus); + if (named_g_nm == NULL) { + UNEXPECTED_ERROR(__FILE__, __LINE__, + "isc_nm_start() failed"); + return (ISC_R_UNEXPECTED); + } + result = isc_taskmgr_create(named_g_mctx, named_g_cpus, 0, - &named_g_taskmgr); + named_g_nm, &named_g_taskmgr); if (result != ISC_R_SUCCESS) { UNEXPECTED_ERROR(__FILE__, __LINE__, "isc_taskmgr_create() failed: %s", @@ -923,6 +925,7 @@ create_managers(void) { return (ISC_R_UNEXPECTED); } isc_socketmgr_maxudp(named_g_socketmgr, maxudp); + isc_nm_maxudp(named_g_nm, maxudp); result = isc_socketmgr_getmaxsockets(named_g_socketmgr, &socks); if (result == ISC_R_SUCCESS) { isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL, @@ -941,6 +944,7 @@ destroy_managers(void) { isc_taskmgr_destroy(&named_g_taskmgr); isc_timermgr_destroy(&named_g_timermgr); isc_socketmgr_destroy(&named_g_socketmgr); + isc_nm_destroy(&named_g_nm); } static void @@ -1254,8 +1258,6 @@ setup(void) { /* * Modify server context according to command line options */ - if (clienttest) - ns_server_setoption(sctx, NS_SERVER_CLIENTTEST, true); if (disable4) ns_server_setoption(sctx, NS_SERVER_DISABLE4, true); if (disable6) diff --git a/bin/named/server.c b/bin/named/server.c index ca216599ff..bf4f4a0ebc 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -9462,6 +9462,7 @@ run_server(isc_task_t *task, isc_event_t *event) { CHECKFATAL(ns_interfacemgr_create(named_g_mctx, server->sctx, named_g_taskmgr, named_g_timermgr, named_g_socketmgr, + named_g_nm, named_g_dispatchmgr, server->task, named_g_udpdisp, geoip, &server->interfacemgr), @@ -9525,6 +9526,12 @@ shutdown_server(isc_task_t *task, isc_event_t *event) { UNUSED(task); INSIST(task == server->task); + /* + * We need to shutdown the interface before going + * exclusive (which would pause the netmgr). + */ + ns_interfacemgr_shutdown(server->interfacemgr); + result = isc_task_beginexclusive(server->task); RUNTIME_CHECK(result == ISC_R_SUCCESS); @@ -9582,7 +9589,6 @@ shutdown_server(isc_task_t *task, isc_event_t *event) { isc_timer_detach(&server->pps_timer); isc_timer_detach(&server->tat_timer); - ns_interfacemgr_shutdown(server->interfacemgr); ns_interfacemgr_detach(&server->interfacemgr); dns_dispatchmgr_destroy(&named_g_dispatchmgr); diff --git a/bin/nsupdate/nsupdate.c b/bin/nsupdate/nsupdate.c index b5cb8f3976..a272b14a0f 100644 --- a/bin/nsupdate/nsupdate.c +++ b/bin/nsupdate/nsupdate.c @@ -906,7 +906,7 @@ setup_system(void) { result = isc_timermgr_create(gmctx, &timermgr); check_result(result, "dns_timermgr_create"); - result = isc_taskmgr_create(gmctx, 1, 0, &taskmgr); + result = isc_taskmgr_create(gmctx, 1, 0, NULL, &taskmgr); check_result(result, "isc_taskmgr_create"); result = isc_task_create(taskmgr, 0, &global_task); diff --git a/bin/rndc/rndc.c b/bin/rndc/rndc.c index 149f58ef97..d9f32a4507 100644 --- a/bin/rndc/rndc.c +++ b/bin/rndc/rndc.c @@ -936,11 +936,14 @@ main(int argc, char **argv) { serial = isc_random32(); isc_mem_create(&rndc_mctx); - DO("create socket manager", isc_socketmgr_create(rndc_mctx, &socketmgr)); - DO("create task manager", isc_taskmgr_create(rndc_mctx, 1, 0, &taskmgr)); + DO("create socket manager", isc_socketmgr_create(rndc_mctx, + &socketmgr)); + DO("create task manager", isc_taskmgr_create(rndc_mctx, 1, 0, + NULL, &taskmgr)); DO("create task", isc_task_create(taskmgr, 0, &task)); - DO("create logging context", isc_log_create(rndc_mctx, &log, &logconfig)); + DO("create logging context", isc_log_create(rndc_mctx, &log, + &logconfig)); isc_log_setcontext(log); DO("setting log tag", isc_log_settag(logconfig, progname)); logdest.file.stream = stderr; diff --git a/bin/tests/optional/adb_test.c b/bin/tests/optional/adb_test.c index 1dcdeff0fd..73d9cd587e 100644 --- a/bin/tests/optional/adb_test.c +++ b/bin/tests/optional/adb_test.c @@ -144,7 +144,7 @@ create_managers(void) { isc_result_t result; taskmgr = NULL; - result = isc_taskmgr_create(mctx, 5, 0, &taskmgr); + result = isc_taskmgr_create(mctx, 5, 0, NULL, &taskmgr); check_result(result, "isc_taskmgr_create"); timermgr = NULL; diff --git a/bin/tests/optional/byaddr_test.c b/bin/tests/optional/byaddr_test.c index b72f508708..036ee961bd 100644 --- a/bin/tests/optional/byaddr_test.c +++ b/bin/tests/optional/byaddr_test.c @@ -112,7 +112,7 @@ main(int argc, char *argv[]) { } taskmgr = NULL; - RUNTIME_CHECK(isc_taskmgr_create(mctx, workers, 0, &taskmgr) + RUNTIME_CHECK(isc_taskmgr_create(mctx, workers, 0, NULL, &taskmgr) == ISC_R_SUCCESS); task = NULL; RUNTIME_CHECK(isc_task_create(taskmgr, 0, &task) diff --git a/bin/tests/optional/byname_test.c b/bin/tests/optional/byname_test.c index ccff87a5fe..fe0d2d7b5a 100644 --- a/bin/tests/optional/byname_test.c +++ b/bin/tests/optional/byname_test.c @@ -226,7 +226,7 @@ main(int argc, char *argv[]) { } taskmgr = NULL; - RUNTIME_CHECK(isc_taskmgr_create(mctx, workers, 0, &taskmgr) == + RUNTIME_CHECK(isc_taskmgr_create(mctx, workers, 0, NULL, &taskmgr) == ISC_R_SUCCESS); task = NULL; RUNTIME_CHECK(isc_task_create(taskmgr, 0, &task) == diff --git a/bin/tests/optional/gsstest.c b/bin/tests/optional/gsstest.c index 49f0a507d3..f708316a00 100644 --- a/bin/tests/optional/gsstest.c +++ b/bin/tests/optional/gsstest.c @@ -470,7 +470,7 @@ main(int argc, char *argv[]) { RUNCHECK(dst_lib_init(mctx, NULL)); taskmgr = NULL; - RUNCHECK(isc_taskmgr_create(mctx, 1, 0, &taskmgr)); + RUNCHECK(isc_taskmgr_create(mctx, 1, 0, NULL, &taskmgr)); task = NULL; RUNCHECK(isc_task_create(taskmgr, 0, &task)); timermgr = NULL; diff --git a/bin/tests/optional/ratelimiter_test.c b/bin/tests/optional/ratelimiter_test.c index 5bb0f081a6..59c1698295 100644 --- a/bin/tests/optional/ratelimiter_test.c +++ b/bin/tests/optional/ratelimiter_test.c @@ -102,7 +102,7 @@ main(int argc, char *argv[]) { isc_interval_set(&linterval, 1, 0); isc_mem_create(&mctx); - RUNTIME_CHECK(isc_taskmgr_create(mctx, 3, 0, &taskmgr) == + RUNTIME_CHECK(isc_taskmgr_create(mctx, 3, 0, NULL, &taskmgr) == ISC_R_SUCCESS); RUNTIME_CHECK(isc_timermgr_create(mctx, &timermgr) == ISC_R_SUCCESS); diff --git a/bin/tests/optional/shutdown_test.c b/bin/tests/optional/shutdown_test.c index 6c533dc992..63331c4360 100644 --- a/bin/tests/optional/shutdown_test.c +++ b/bin/tests/optional/shutdown_test.c @@ -181,7 +181,7 @@ main(int argc, char *argv[]) { isc_mem_create(&mctx); mctx2 = NULL; isc_mem_create(&mctx2); - RUNTIME_CHECK(isc_taskmgr_create(mctx, workers, 0, &task_manager) == + RUNTIME_CHECK(isc_taskmgr_create(mctx, workers, 0, NULL, &task_manager) == ISC_R_SUCCESS); RUNTIME_CHECK(isc_timermgr_create(mctx, &timer_manager) == ISC_R_SUCCESS); diff --git a/bin/tests/optional/sig0_test.c b/bin/tests/optional/sig0_test.c index 175edd80cd..72c5c23db1 100644 --- a/bin/tests/optional/sig0_test.c +++ b/bin/tests/optional/sig0_test.c @@ -226,7 +226,7 @@ main(int argc, char *argv[]) { dst_result_register(); taskmgr = NULL; - RUNTIME_CHECK(isc_taskmgr_create(mctx, 2, 0, &taskmgr) == + RUNTIME_CHECK(isc_taskmgr_create(mctx, 2, 0, NULL, &taskmgr) == ISC_R_SUCCESS); task1 = NULL; RUNTIME_CHECK(isc_task_create(taskmgr, 0, &task1) == ISC_R_SUCCESS); diff --git a/bin/tests/optional/sock_test.c b/bin/tests/optional/sock_test.c index 15ea571c2c..f242638bad 100644 --- a/bin/tests/optional/sock_test.c +++ b/bin/tests/optional/sock_test.c @@ -292,7 +292,7 @@ main(int argc, char *argv[]) { * The task manager is independent (other than memory context) */ manager = NULL; - RUNTIME_CHECK(isc_taskmgr_create(mctx, workers, 0, &manager) == + RUNTIME_CHECK(isc_taskmgr_create(mctx, workers, 0, NULL, &manager) == ISC_R_SUCCESS); /* diff --git a/bin/tests/optional/task_test.c b/bin/tests/optional/task_test.c index 9a680c0cc7..0e848aeda7 100644 --- a/bin/tests/optional/task_test.c +++ b/bin/tests/optional/task_test.c @@ -79,7 +79,7 @@ main(int argc, char *argv[]) { isc_mem_create(&mctx); - RUNTIME_CHECK(isc_taskmgr_create(mctx, workers, 0, &manager) == + RUNTIME_CHECK(isc_taskmgr_create(mctx, workers, 0, NULL, &manager) == ISC_R_SUCCESS); RUNTIME_CHECK(isc_task_create(manager, 0, &t1) == ISC_R_SUCCESS); diff --git a/bin/tests/optional/timer_test.c b/bin/tests/optional/timer_test.c index 6122aca33e..61f7b64a8f 100644 --- a/bin/tests/optional/timer_test.c +++ b/bin/tests/optional/timer_test.c @@ -108,7 +108,7 @@ main(int argc, char *argv[]) { printf("%u workers\n", workers); isc_mem_create(&mctx1); - RUNTIME_CHECK(isc_taskmgr_create(mctx1, workers, 0, &manager) == + RUNTIME_CHECK(isc_taskmgr_create(mctx1, workers, 0, NULL, &manager) == ISC_R_SUCCESS); RUNTIME_CHECK(isc_timermgr_create(mctx1, &timgr) == ISC_R_SUCCESS); diff --git a/bin/tests/optional/zone_test.c b/bin/tests/optional/zone_test.c index 914a4341f2..641533c8f5 100644 --- a/bin/tests/optional/zone_test.c +++ b/bin/tests/optional/zone_test.c @@ -280,7 +280,7 @@ main(int argc, char **argv) { RUNTIME_CHECK(isc_app_start() == ISC_R_SUCCESS); isc_mem_create(&mctx); - RUNTIME_CHECK(isc_taskmgr_create(mctx, 2, 0, &taskmgr) == + RUNTIME_CHECK(isc_taskmgr_create(mctx, 2, 0, NULL, &taskmgr) == ISC_R_SUCCESS); RUNTIME_CHECK(isc_timermgr_create(mctx, &timermgr) == ISC_R_SUCCESS); RUNTIME_CHECK(isc_socketmgr_create(mctx, &socketmgr) == ISC_R_SUCCESS); diff --git a/bin/tests/system/README b/bin/tests/system/README index f527e790d4..13286c054e 100644 --- a/bin/tests/system/README +++ b/bin/tests/system/README @@ -584,10 +584,6 @@ By default, start.pl starts a "named" server with the following options: preventing multiple instances of this named running in this directory (which could possibly interfere with the test). -In addition, start.pl also sets the following undocumented flag: - - -T clienttest Makes clients single-shot with their own memory context. - All output is sent to a file called "named.run" in the nameserver directory. The options used to start named can be altered. There are three ways of doing @@ -608,9 +604,9 @@ the named command-line arguments. The rest of the file is ignored. 3. Tweaking the default command line arguments with "-T" options. This flag is used to alter the behavior of BIND for testing and is not documented in the -ARM. The "clienttest" option has already been mentioned, but the presence of -certain files in the "nsN" directory adds flags to the default command line -(the content of the files is irrelevant - it is only the presence that counts): +ARM. The presence of certain files in the "nsN" directory adds flags to +the default command line (the content of the files is irrelevant - it +is only the presence that counts): named.noaa Appends "-T noaa" to the command line, which causes "named" to never set the AA bit in an answer. @@ -635,7 +631,6 @@ certain files in the "nsN" directory adds flags to the default command line the additional section if the response is triggered by RPZ rewriting). - Starting Other Nameservers --- In contrast to "named", nameservers written in Perl or Python (whose script diff --git a/bin/tests/system/additional/ns1/named.args b/bin/tests/system/additional/ns1/named.args index 39b8c1ae55..0db1ead834 100644 --- a/bin/tests/system/additional/ns1/named.args +++ b/bin/tests/system/additional/ns1/named.args @@ -1,2 +1,2 @@ # this server runs named with only one worker thread --m record,size,mctx -c named.conf -d 99 -D additional-ns1 -X named.lock -g -T clienttest -n 1 +-m record,size,mctx -c named.conf -d 99 -D additional-ns1 -X named.lock -g -n 1 diff --git a/bin/tests/system/addzone/tests.sh b/bin/tests/system/addzone/tests.sh index 4e5301f6ae..a4c1ca425d 100755 --- a/bin/tests/system/addzone/tests.sh +++ b/bin/tests/system/addzone/tests.sh @@ -696,11 +696,17 @@ $RNDCCMD 10.53.0.3 addzone "test4.baz" '{ type master; file "e.db"; };' > /dev/n $RNDCCMD 10.53.0.3 addzone "test5.baz" '{ type master; file "e.db"; };' > /dev/null 2>&1 || ret=1 $PERL $SYSTEMTESTTOP/stop.pl addzone ns3 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} addzone ns3 || ret=1 -$DIG $DIGOPTS @10.53.0.3 version.bind txt ch > dig.out.test$n || ret=1 -grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 -n=`expr $n + 1` +for try in 0 1 2 3 4 5 6 7 8 9; do + iret=0 + $DIG $DIGOPTS @10.53.0.3 version.bind txt ch > dig.out.test$n || iret=1 + grep "status: NOERROR" dig.out.test$n > /dev/null || iret=1 + [ "$iret" -eq 0 ] && break + sleep 1 +done +[ "$iret" -ne 0 ] && ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` +n=`expr $n + 1` echo_i "exit status: $status" [ $status -eq 0 ] || exit 1 diff --git a/bin/tests/system/allow-query/ns3/named.args b/bin/tests/system/allow-query/ns3/named.args index 0f50735bfd..5dd675d0fd 100644 --- a/bin/tests/system/allow-query/ns3/named.args +++ b/bin/tests/system/allow-query/ns3/named.args @@ -1,2 +1,2 @@ # this server only has 127.0.0.1 in its localhost/localnets ACLs --m record,size,mctx -c named.conf -d 99 -D allow-query-ns3 -X named.lock -g -T clienttest -T fixedlocal +-m record,size,mctx -c named.conf -d 99 -D allow-query-ns3 -X named.lock -g -T fixedlocal diff --git a/bin/tests/system/delzone/ns2/named.args b/bin/tests/system/delzone/ns2/named.args index 12588aa196..b3028b1d29 100644 --- a/bin/tests/system/delzone/ns2/named.args +++ b/bin/tests/system/delzone/ns2/named.args @@ -1 +1 @@ --D delzone-ns2 -X named.lock -m record,size,mctx -T clienttest -c named.conf -g -U 4 +-D delzone-ns2 -X named.lock -m record,size,mctx -c named.conf -g -U 4 diff --git a/bin/tests/system/dlzexternal/driver.c b/bin/tests/system/dlzexternal/driver.c index b969b9e053..4774118a0b 100644 --- a/bin/tests/system/dlzexternal/driver.c +++ b/bin/tests/system/dlzexternal/driver.c @@ -124,13 +124,13 @@ add_name(struct dlz_example_data *state, struct record *list, strlen(data) >= sizeof(list[i].data)) return (ISC_R_NOSPACE); - strncpy(list[i].name, name, sizeof(list[i].name)); + strncpy(list[i].name, name, sizeof(list[i].name) - 1); list[i].name[sizeof(list[i].name) - 1] = '\0'; - strncpy(list[i].type, type, sizeof(list[i].type)); + strncpy(list[i].type, type, sizeof(list[i].type) - 1); list[i].type[sizeof(list[i].type) - 1] = '\0'; - strncpy(list[i].data, data, sizeof(list[i].data)); + strncpy(list[i].data, data, sizeof(list[i].data) - 1); list[i].data[sizeof(list[i].data) - 1] = '\0'; list[i].ttl = ttl; diff --git a/bin/tests/system/dnssec/ns6/named.args b/bin/tests/system/dnssec/ns6/named.args index f500166416..21242f17ff 100644 --- a/bin/tests/system/dnssec/ns6/named.args +++ b/bin/tests/system/dnssec/ns6/named.args @@ -1 +1 @@ --m record,size,mctx -c named.conf -d 99 -D dnssec-ns6 -X named.lock -g -T nonearest -T clienttest -T tat=1 +-m record,size,mctx -c named.conf -d 99 -D dnssec-ns6 -X named.lock -g -T nonearest -T tat=1 diff --git a/bin/tests/system/dscp/ns1/named.args b/bin/tests/system/dscp/ns1/named.args index 248cee7d55..4986abce05 100644 --- a/bin/tests/system/dscp/ns1/named.args +++ b/bin/tests/system/dscp/ns1/named.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -D dscp-ns1 -X named.lock -g -U 4 -T dscp=46 +-m record,size,mctx -c named.conf -d 99 -D dscp-ns1 -X named.lock -g -U 4 -T dscp=46 diff --git a/bin/tests/system/dscp/ns2/named.args b/bin/tests/system/dscp/ns2/named.args index 4a205add68..91635e8c04 100644 --- a/bin/tests/system/dscp/ns2/named.args +++ b/bin/tests/system/dscp/ns2/named.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -D dscp-ns2 -X named.lock -g -U 4 -T dscp=46 +-m record,size,mctx -c named.conf -d 99 -D dscp-ns2 -X named.lock -g -U 4 -T dscp=46 diff --git a/bin/tests/system/dscp/ns3/named.args b/bin/tests/system/dscp/ns3/named.args index cf4a1821d6..ec9b5934da 100644 --- a/bin/tests/system/dscp/ns3/named.args +++ b/bin/tests/system/dscp/ns3/named.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -D dscp-ns3 -X named.lock -g -U 4 -T dscp=46 +-m record,size,mctx -c named.conf -d 99 -D dscp-ns3 -X named.lock -g -U 4 -T dscp=46 diff --git a/bin/tests/system/dscp/ns4/named.args b/bin/tests/system/dscp/ns4/named.args index 57678fe2fb..6da9eff607 100644 --- a/bin/tests/system/dscp/ns4/named.args +++ b/bin/tests/system/dscp/ns4/named.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -D dscp-ns4 -X named.lock -g -U 4 -T dscp=46 +-m record,size,mctx -c named.conf -d 99 -D dscp-ns4 -X named.lock -g -U 4 -T dscp=46 diff --git a/bin/tests/system/dscp/ns5/named.args b/bin/tests/system/dscp/ns5/named.args index 8382488588..dc556e7e86 100644 --- a/bin/tests/system/dscp/ns5/named.args +++ b/bin/tests/system/dscp/ns5/named.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -D dscp-ns5 -X named.lock -g -U 4 -T dscp=46 +-m record,size,mctx -c named.conf -d 99 -D dscp-ns5 -X named.lock -g -U 4 -T dscp=46 diff --git a/bin/tests/system/dscp/ns6/named.args b/bin/tests/system/dscp/ns6/named.args index 482dd40870..c7389076ba 100644 --- a/bin/tests/system/dscp/ns6/named.args +++ b/bin/tests/system/dscp/ns6/named.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -D dscp-ns6 -X named.lock -g -U 4 -T dscp=46 +-m record,size,mctx -c named.conf -d 99 -D dscp-ns6 -X named.lock -g -U 4 -T dscp=46 diff --git a/bin/tests/system/dscp/ns7/named.args b/bin/tests/system/dscp/ns7/named.args index 0528448c11..39ccaa4bbf 100644 --- a/bin/tests/system/dscp/ns7/named.args +++ b/bin/tests/system/dscp/ns7/named.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -D dscp-ns7 -X named.lock -g -U 4 -T dscp=46 +-m record,size,mctx -c named.conf -d 99 -D dscp-ns7 -X named.lock -g -U 4 -T dscp=46 diff --git a/bin/tests/system/dupsigs/ns1/named.args b/bin/tests/system/dupsigs/ns1/named.args index c7cab8aa5b..2eed0529a7 100644 --- a/bin/tests/system/dupsigs/ns1/named.args +++ b/bin/tests/system/dupsigs/ns1/named.args @@ -1 +1 @@ --D dupsigs-ns1 -X named.lock -m record,size,mctx -T clienttest -c named.conf -d 99 -g -U 4 -T sigvalinsecs +-D dupsigs-ns1 -X named.lock -m record,size,mctx -c named.conf -d 99 -g -U 4 -T sigvalinsecs diff --git a/bin/tests/system/ecdsa/tests.sh b/bin/tests/system/ecdsa/tests.sh index c4ceefc346..7cddfd6ce5 100644 --- a/bin/tests/system/ecdsa/tests.sh +++ b/bin/tests/system/ecdsa/tests.sh @@ -20,7 +20,6 @@ rm -f dig.out.* DIGOPTS="+tcp +noau +noadd +nosea +nostat +nocmd +dnssec -p 5300" # Check the example. domain - echo "I:checking that positive validation works ($n)" ret=0 $DIG $DIGOPTS . @10.53.0.1 soa > dig.out.ns1.test$n || ret=1 diff --git a/bin/tests/system/fetchlimit/ns3/named.args b/bin/tests/system/fetchlimit/ns3/named.args deleted file mode 100644 index 1d7ee742c5..0000000000 --- a/bin/tests/system/fetchlimit/ns3/named.args +++ /dev/null @@ -1,2 +0,0 @@ -# Don't specify '-T clienttest' as it consumes lots of memory with this test --D fetchlimit-ns3 -X named.lock -m record,size,mctx -c named.conf -d 99 -g -U 4 diff --git a/bin/tests/system/forward/tests.sh b/bin/tests/system/forward/tests.sh index 36fd8a0040..1c3096cb79 100644 --- a/bin/tests/system/forward/tests.sh +++ b/bin/tests/system/forward/tests.sh @@ -98,10 +98,15 @@ status=`expr $status + $ret` echo_i "checking that forward only zone overrides empty zone" ret=0 -$DIG $DIGOPTS 1.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2 -grep "status: NOERROR" dig.out.f2 > /dev/null || ret=1 -$DIG $DIGOPTS 2.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2 -grep "status: NXDOMAIN" dig.out.f2 > /dev/null || ret=1 +# retry loop in case the server restart above causes transient failure +for try in 0 1 2 3 4 5 6 7 8 9; do + $DIG $DIGOPTS 1.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2 + grep "status: NOERROR" dig.out.f2 > /dev/null || ret=1 + $DIG $DIGOPTS 2.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2 + grep "status: NXDOMAIN" dig.out.f2 > /dev/null || ret=1 + [ "$ret" -eq 0 ] && break + sleep 1 +done if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` diff --git a/bin/tests/system/legacy/clean.sh b/bin/tests/system/legacy/clean.sh index ad7ef8540d..f883185746 100644 --- a/bin/tests/system/legacy/clean.sh +++ b/bin/tests/system/legacy/clean.sh @@ -14,6 +14,7 @@ rm -f ns*/named.run rm -f ns*/named.lock # build.sh +rm -f ns1/named_dump.db* rm -f ns6/K* rm -f ns6/dsset-* rm -f ns6/edns512.db diff --git a/bin/tests/system/legacy/ns4/named.args b/bin/tests/system/legacy/ns4/named.args index 1f11800927..24af8c1127 100644 --- a/bin/tests/system/legacy/ns4/named.args +++ b/bin/tests/system/legacy/ns4/named.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -D legacy-ns4 -X named.lock -g -U 4 -T noedns +-m record,size,mctx -c named.conf -d 99 -D legacy-ns4 -X named.lock -g -U 4 -T noedns diff --git a/bin/tests/system/legacy/ns5/named.args b/bin/tests/system/legacy/ns5/named.args index 54aa083f26..515e77d0d9 100644 --- a/bin/tests/system/legacy/ns5/named.args +++ b/bin/tests/system/legacy/ns5/named.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -D legacy-ns5 -X named.lock -g -U 4 -T noedns +-m record,size,mctx -c named.conf -d 99 -D legacy-ns5 -X named.lock -g -U 4 -T noedns diff --git a/bin/tests/system/legacy/ns6/named.args b/bin/tests/system/legacy/ns6/named.args index b9a278ec57..cdc570f7c8 100644 --- a/bin/tests/system/legacy/ns6/named.args +++ b/bin/tests/system/legacy/ns6/named.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -D legacy-ns6 -X named.lock -g -U 4 -T maxudp512 +-m record,size,mctx -c named.conf -d 99 -D legacy-ns6 -X named.lock -g -U 4 -T maxudp512 diff --git a/bin/tests/system/legacy/ns7/named.args b/bin/tests/system/legacy/ns7/named.args index fbe0ebfa1d..2a1a61217b 100644 --- a/bin/tests/system/legacy/ns7/named.args +++ b/bin/tests/system/legacy/ns7/named.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -D legacy-ns7 -X named.lock -g -U 4 -T maxudp512 +-m record,size,mctx -c named.conf -d 99 -D legacy-ns7 -X named.lock -g -U 4 -T maxudp512 diff --git a/bin/tests/system/legacy/tests.sh b/bin/tests/system/legacy/tests.sh index ed784d9615..8cbbeef53b 100755 --- a/bin/tests/system/legacy/tests.sh +++ b/bin/tests/system/legacy/tests.sh @@ -259,8 +259,13 @@ $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} legacy ns1 n=`expr $n + 1` echo_i "checking recursive lookup to edns 512 + no tcp + trust anchor fails ($n)" -ret=0 -resolution_fails edns512-notcp. || ret=1 +# retry loop in case the server restart above causes transient failure +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + resolution_fails edns512-notcp. || ret=1 + [ "$ret" -eq 0 ] && break + sleep 1 +done if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` diff --git a/bin/tests/system/logfileconfig/tests.sh b/bin/tests/system/logfileconfig/tests.sh index 1a43b27a3c..67d06d8ad0 100644 --- a/bin/tests/system/logfileconfig/tests.sh +++ b/bin/tests/system/logfileconfig/tests.sh @@ -36,7 +36,7 @@ DLFILE="named_deflog" PIDFILE="${THISDIR}/${CONFDIR}/named.pid" myRNDC="$RNDC -c ${THISDIR}/${CONFDIR}/rndc.conf" -myNAMED="$NAMED -c ${THISDIR}/${CONFDIR}/named.conf -m record,size,mctx -T clienttest -T nosyslog -d 99 -D logfileconfig-ns1 -X named.lock -U 4" +myNAMED="$NAMED -c ${THISDIR}/${CONFDIR}/named.conf -m record,size,mctx -T nosyslog -d 99 -D logfileconfig-ns1 -X named.lock -U 4" # Test given condition. If true, test again after a second. Used for testing # filesystem-dependent conditions in order to prevent false negatives caused by diff --git a/bin/tests/system/mirror/ns3/named.args b/bin/tests/system/mirror/ns3/named.args index 5330759bd4..be1cb49ce3 100644 --- a/bin/tests/system/mirror/ns3/named.args +++ b/bin/tests/system/mirror/ns3/named.args @@ -1 +1 @@ --D mirror-ns3 -X named.lock -m record,size,mctx -T clienttest -c named.conf -d 99 -g -U 4 -T tat=3 +-D mirror-ns3 -X named.lock -m record,size,mctx -c named.conf -d 99 -g -U 4 -T tat=3 diff --git a/bin/tests/system/mkeys/ns2/named.args b/bin/tests/system/mkeys/ns2/named.args index 614243233d..9f4ad4e46b 100644 --- a/bin/tests/system/mkeys/ns2/named.args +++ b/bin/tests/system/mkeys/ns2/named.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -D mkeys-ns2 -X named.lock -g -T mkeytimers=5/10/20 -T tat=1 +-m record,size,mctx -c named.conf -d 99 -D mkeys-ns2 -X named.lock -g -T mkeytimers=5/10/20 -T tat=1 diff --git a/bin/tests/system/mkeys/ns3/named.args b/bin/tests/system/mkeys/ns3/named.args index b8fb008562..376aa253cf 100644 --- a/bin/tests/system/mkeys/ns3/named.args +++ b/bin/tests/system/mkeys/ns3/named.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -D mkeys-ns3 -X named.lock -g -T mkeytimers=5/10/20 +-m record,size,mctx -c named.conf -d 99 -D mkeys-ns3 -X named.lock -g -T mkeytimers=5/10/20 diff --git a/bin/tests/system/mkeys/ns5/named1.args b/bin/tests/system/mkeys/ns5/named1.args index efb102a4ba..2e6aadc2c1 100644 --- a/bin/tests/system/mkeys/ns5/named1.args +++ b/bin/tests/system/mkeys/ns5/named1.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -X named.lock -g +-m record,size,mctx -c named.conf -d 99 -X named.lock -g diff --git a/bin/tests/system/mkeys/ns5/named2.args b/bin/tests/system/mkeys/ns5/named2.args index d222b7faea..3eaf260cff 100644 --- a/bin/tests/system/mkeys/ns5/named2.args +++ b/bin/tests/system/mkeys/ns5/named2.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -X named.lock -g -T mkeytimers=2/20/40 +-m record,size,mctx -c named.conf -d 99 -X named.lock -g -T mkeytimers=2/20/40 diff --git a/bin/tests/system/mkeys/ns6/named.args b/bin/tests/system/mkeys/ns6/named.args index 02f8f670f6..74ea7e0a81 100644 --- a/bin/tests/system/mkeys/ns6/named.args +++ b/bin/tests/system/mkeys/ns6/named.args @@ -1 +1 @@ --m record,size,mctx -T clienttest -c named.conf -d 99 -X named.lock -g -T mkeytimers=5/10/20 +-m record,size,mctx -c named.conf -d 99 -X named.lock -g -T mkeytimers=5/10/20 diff --git a/bin/tests/system/nsupdate/ns5/named.args b/bin/tests/system/nsupdate/ns5/named.args index 49cb45ebe6..6555b33a53 100644 --- a/bin/tests/system/nsupdate/ns5/named.args +++ b/bin/tests/system/nsupdate/ns5/named.args @@ -1 +1 @@ --D nsupdate-ns5 -m record,size,mctx -T clienttest -c named.conf -d 99 -X named.lock -g -U 4 -T fixedlocal +-D nsupdate-ns5 -m record,size,mctx -c named.conf -d 99 -X named.lock -g -U 4 -T fixedlocal diff --git a/bin/tests/system/nsupdate/ns6/named.args b/bin/tests/system/nsupdate/ns6/named.args index 75ca52915e..827afb9948 100644 --- a/bin/tests/system/nsupdate/ns6/named.args +++ b/bin/tests/system/nsupdate/ns6/named.args @@ -1 +1 @@ --D nsupdate-ns6 -m record,size,mctx -T clienttest -c named.conf -d 99 -X named.lock -g -U 4 -T fixedlocal +-D nsupdate-ns6 -m record,size,mctx -c named.conf -d 99 -X named.lock -g -U 4 -T fixedlocal diff --git a/bin/tests/system/nsupdate/tests.sh b/bin/tests/system/nsupdate/tests.sh index b73d1785bb..83922eda73 100755 --- a/bin/tests/system/nsupdate/tests.sh +++ b/bin/tests/system/nsupdate/tests.sh @@ -506,7 +506,6 @@ grep "add nsec3param.test. 0 IN TYPE65534 .# 6 000140000400" jp.out.ns3.$n > /de if [ $ret != 0 ] ; then echo_i "failed"; status=`expr $ret + $status`; fi - ret=0 echo_i "testing that rndc stop updates the master file" $NSUPDATE -k ns1/ddns.key < /dev/null || ret=1 @@ -514,16 +513,24 @@ server 10.53.0.1 ${PORT} update add updated4.example.nil. 600 A 10.10.10.3 send END +sleep 3 $PERL $SYSTEMTESTTOP/stop.pl --use-rndc --port ${CONTROLPORT} nsupdate ns1 +sleep 3 # Removing the journal file and restarting the server means # that the data served by the new server process are exactly # those dumped to the master file by "rndc stop". rm -f ns1/*jnl $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} nsupdate ns1 -$DIG $DIGOPTS +tcp +noadd +nosea +nostat +noquest +nocomm +nocmd updated4.example.nil.\ - @10.53.0.1 a > dig.out.ns1 || status=1 -digcomp knowngood.ns1.afterstop dig.out.ns1 || ret=1 -[ $ret = 0 ] || { echo_i "failed"; status=1; } +for try in 0 1 2 3 4 5 6 7 8 9; do + iret=0 + $DIG $DIGOPTS +tcp +noadd +nosea +nostat +noquest +nocomm +nocmd \ + updated4.example.nil. @10.53.0.1 a > dig.out.ns1 || iret=1 + digcomp knowngood.ns1.afterstop dig.out.ns1 || iret=1 + [ "$iret" -eq 0 ] && break + sleep 1 +done +[ "$iret" -ne 0 ] && ret=1 +[ "$ret" -eq 0 ] || { echo_i "failed"; status=1; } ret=0 echo_i "check that 'nsupdate -l' with a missing keyfile reports the missing file" diff --git a/bin/tests/system/nzd2nzf/tests.sh b/bin/tests/system/nzd2nzf/tests.sh index ea013af48d..34ede6e4e7 100644 --- a/bin/tests/system/nzd2nzf/tests.sh +++ b/bin/tests/system/nzd2nzf/tests.sh @@ -61,9 +61,14 @@ $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} nzd2nzf ns1 n=`expr $n + 1` echo_i "querying for zone data from migrated zone config ($n)" -ret=0 -$DIG $DIGOPTS @10.53.0.1 a.added.example a > dig.out.ns1.$n || ret=1 -grep 'status: NOERROR' dig.out.ns1.$n > /dev/null || ret=1 +# retry loop in case the server restart above causes transient failures +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + $DIG $DIGOPTS @10.53.0.1 a.added.example a > dig.out.ns1.$n || ret=1 + grep 'status: NOERROR' dig.out.ns1.$n > /dev/null || ret=1 + [ "$ret" -eq 0 ] && break + sleep 1 +done n=`expr $n + 1` if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` diff --git a/bin/tests/system/pipelined/pipequeries.c b/bin/tests/system/pipelined/pipequeries.c index f04b7f0c04..8af1b8985e 100644 --- a/bin/tests/system/pipelined/pipequeries.c +++ b/bin/tests/system/pipelined/pipequeries.c @@ -277,7 +277,7 @@ main(int argc, char *argv[]) { RUNCHECK(dst_lib_init(mctx, NULL)); taskmgr = NULL; - RUNCHECK(isc_taskmgr_create(mctx, 1, 0, &taskmgr)); + RUNCHECK(isc_taskmgr_create(mctx, 1, 0, NULL, &taskmgr)); task = NULL; RUNCHECK(isc_task_create(taskmgr, 0, &task)); timermgr = NULL; diff --git a/bin/tests/system/resolver/ns7/named.args b/bin/tests/system/resolver/ns7/named.args deleted file mode 100644 index 1e4a9bf325..0000000000 --- a/bin/tests/system/resolver/ns7/named.args +++ /dev/null @@ -1,2 +0,0 @@ -# this server runs named with the "-T clienttest" option omitted --m record,size,mctx -c named.conf -d 99 -D resolver-ns7 -X named.lock -g diff --git a/bin/tests/system/resolver/ns7/named2.conf.in b/bin/tests/system/resolver/ns7/named2.conf.in index b966e783b7..787705984d 100644 --- a/bin/tests/system/resolver/ns7/named2.conf.in +++ b/bin/tests/system/resolver/ns7/named2.conf.in @@ -12,7 +12,7 @@ // NS7 options { - query-source address 10.53.0.7 port @PORT@ dscp 13; + query-source address 10.53.0.7 dscp 13; notify-source 10.53.0.7 dscp 14; transfer-source 10.53.0.7 dscp 15; port @PORT@; diff --git a/bin/tests/system/rndc/ns6/named.args b/bin/tests/system/rndc/ns6/named.args index 479c740f81..e876eb8930 100644 --- a/bin/tests/system/rndc/ns6/named.args +++ b/bin/tests/system/rndc/ns6/named.args @@ -1,3 +1,3 @@ # teardown of a huge zone with tracing enabled takes way too long # -m none is set so that stop.pl does not timeout --D rndc-ns6 -X named.lock -m none -T clienttest -c named.conf -d 99 -g -U 4 +-D rndc-ns6 -X named.lock -m none -c named.conf -d 99 -g -U 4 diff --git a/bin/tests/system/rpz/tests.sh b/bin/tests/system/rpz/tests.sh index 255779f785..88f74d0576 100644 --- a/bin/tests/system/rpz/tests.sh +++ b/bin/tests/system/rpz/tests.sh @@ -219,6 +219,7 @@ restart () { $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpz ns$1 load_db dnsrps_loaded + sleep 1 } # $1=server and irrelevant args @@ -465,6 +466,7 @@ for mode in native dnsrps; do else echo_i "running DNSRPS sub-test" $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpz + sleep 3 fi ;; esac diff --git a/bin/tests/system/rpzrecurse/tests.sh b/bin/tests/system/rpzrecurse/tests.sh index 763cc2b5f8..11160cacdf 100644 --- a/bin/tests/system/rpzrecurse/tests.sh +++ b/bin/tests/system/rpzrecurse/tests.sh @@ -135,6 +135,7 @@ for mode in native dnsrps; do else echo_i "running DNSRPS sub-test" $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpzrecurse + sleep 3 fi ;; esac diff --git a/bin/tests/system/start.pl b/bin/tests/system/start.pl index 869c1f0eb2..1ecbd70f22 100755 --- a/bin/tests/system/start.pl +++ b/bin/tests/system/start.pl @@ -257,7 +257,6 @@ sub construct_ns_command { $command .= "-D $test-$server "; $command .= "-X named.lock "; $command .= "-m record,size,mctx "; - $command .= "-T clienttest "; foreach my $t_option( "dropedns", "ednsformerr", "ednsnotimp", "ednsrefused", diff --git a/bin/tests/system/statistics/tests.sh b/bin/tests/system/statistics/tests.sh index ccbca44996..ce82d0d2a8 100644 --- a/bin/tests/system/statistics/tests.sh +++ b/bin/tests/system/statistics/tests.sh @@ -71,7 +71,7 @@ $RNDCCMD -s 10.53.0.3 stats > /dev/null 2>&1 [ -f ns3/named.stats ] || ret=1 if [ ! "$CYGWIN" ]; then nsock0nstat=`grep "UDP/IPv4 sockets active" ns3/named.stats | awk '{print $1}'` - [ 0 -ne ${nsock0nstat:-0} ] || ret=1 + [ 0 -eq ${nsock0nstat:-0} ] || ret=1 fi if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` @@ -107,7 +107,7 @@ if [ ! "$CYGWIN" ]; then ret=0 echo_i "verifying active sockets output in named.stats ($n)" nsock1nstat=`grep "UDP/IPv4 sockets active" ns3/named.stats | awk '{print $1}'` - [ `expr $nsock1nstat - $nsock0nstat` -eq 1 ] || ret=1 + [ `expr ${nsock1nstat:-0} - ${nsock0nstat:-0}` -eq 1 ] || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` n=`expr $n + 1` diff --git a/bin/tests/system/tcp/ans6/ans.py b/bin/tests/system/tcp/ans6/ans.py index 3debf19e20..331ac7fbd1 100644 --- a/bin/tests/system/tcp/ans6/ans.py +++ b/bin/tests/system/tcp/ans6/ans.py @@ -42,7 +42,7 @@ import time # Timeout for establishing all connections requested by a single 'open' command. OPEN_TIMEOUT = 2 - +VERSION_QUERY = b'\x00\x1e\xaf\xb8\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x07version\x04bind\x00\x00\x10\x00\x03' def log(msg): print(datetime.datetime.now().strftime('%d-%b-%Y %H:%M:%S.%f ') + msg) @@ -84,6 +84,7 @@ def open_connections(active_conns, count, host, port): log('%s for socket %s' % (errno.errorcode[err], sock)) errors.append(sock) else: + sock.send(VERSION_QUERY) active_conns.append(sock) if errors: diff --git a/bin/tests/system/tcp/tests.sh b/bin/tests/system/tcp/tests.sh index 3af9432031..faf2e1ba78 100644 --- a/bin/tests/system/tcp/tests.sh +++ b/bin/tests/system/tcp/tests.sh @@ -163,8 +163,12 @@ check_stats_limit() { assert_int_equal "${TCP_HIGH}" "${TCP_LIMIT}" "TCP high-water value" || return 1 } retry 2 check_stats_limit || ret=1 +close_connections $((TCP_LIMIT + 1)) if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status + ret)) +# wait for connections to close +sleep 5 + echo_i "exit status: $status" [ $status -eq 0 ] || exit 1 diff --git a/bin/tests/system/tkey/keycreate.c b/bin/tests/system/tkey/keycreate.c index 9ccb24ff32..e8c8811655 100644 --- a/bin/tests/system/tkey/keycreate.c +++ b/bin/tests/system/tkey/keycreate.c @@ -236,7 +236,7 @@ main(int argc, char *argv[]) { RUNCHECK(dst_lib_init(mctx, NULL)); taskmgr = NULL; - RUNCHECK(isc_taskmgr_create(mctx, 1, 0, &taskmgr)); + RUNCHECK(isc_taskmgr_create(mctx, 1, 0, NULL, &taskmgr)); task = NULL; RUNCHECK(isc_task_create(taskmgr, 0, &task)); timermgr = NULL; diff --git a/bin/tests/system/tkey/keydelete.c b/bin/tests/system/tkey/keydelete.c index c96d38950c..bc3a8d5539 100644 --- a/bin/tests/system/tkey/keydelete.c +++ b/bin/tests/system/tkey/keydelete.c @@ -175,7 +175,7 @@ main(int argc, char **argv) { RUNCHECK(dst_lib_init(mctx, NULL)); taskmgr = NULL; - RUNCHECK(isc_taskmgr_create(mctx, 1, 0, &taskmgr)); + RUNCHECK(isc_taskmgr_create(mctx, 1, 0, NULL, &taskmgr)); task = NULL; RUNCHECK(isc_task_create(taskmgr, 0, &task)); timermgr = NULL; diff --git a/bin/tests/system/unknown/tests.sh b/bin/tests/system/unknown/tests.sh index 190b84020d..eeb8920ffa 100644 --- a/bin/tests/system/unknown/tests.sh +++ b/bin/tests/system/unknown/tests.sh @@ -122,16 +122,24 @@ do done echo_i "checking large unknown record loading on master" -ret=0 -$DIG $DIGOPTS @10.53.0.1 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } -$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + $DIG $DIGOPTS @10.53.0.1 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } + $DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } + [ "$ret" -eq 0 ] && break + sleep 1 +done [ $ret = 0 ] || echo_i "failed" status=`expr $status + $ret` echo_i "checking large unknown record loading on slave" -ret=0 -$DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } -$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + $DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } + $DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } + [ "$ret" -eq 0 ] && break + sleep 1 +done [ $ret = 0 ] || echo_i "failed" status=`expr $status + $ret` @@ -139,10 +147,16 @@ echo_i "stop and restart slave" $PERL $SYSTEMTESTTOP/stop.pl unknown ns2 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} unknown ns2 +# server may be answering queries before zones are loaded, +# so retry a few times if this query fails echo_i "checking large unknown record loading on slave" -ret=0 -$DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } -$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + $DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } + $DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } + [ "$ret" -eq 0 ] && break + sleep 1 +done [ $ret = 0 ] || echo_i "failed" status=`expr $status + $ret` @@ -157,10 +171,16 @@ echo_i "stop and restart inline slave" $PERL $SYSTEMTESTTOP/stop.pl unknown ns3 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} unknown ns3 +# server may be answering queries before zones are loaded, +# so retry a few times if this query fails echo_i "checking large unknown record loading on inline slave" -ret=0 -$DIG $DIGOPTS @10.53.0.3 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } -$DIFF large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + $DIG $DIGOPTS @10.53.0.3 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } + $DIFF large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } + [ "$ret" -eq 0 ] && break + sleep 1 +done [ $ret = 0 ] || echo_i "failed" status=`expr $status + $ret` diff --git a/bin/tests/system/upforwd/ns3/named.conf.in b/bin/tests/system/upforwd/ns3/named.conf.in index e440a1f0d2..d037e745e8 100644 --- a/bin/tests/system/upforwd/ns3/named.conf.in +++ b/bin/tests/system/upforwd/ns3/named.conf.in @@ -17,7 +17,7 @@ options { pid-file "named.pid"; listen-on { 10.53.0.3; }; listen-on-v6 { none; }; - recursion yes; + recursion no; notify yes; }; diff --git a/bin/tests/system/upforwd/tests.sh b/bin/tests/system/upforwd/tests.sh index b0694bbd5c..3b0d7b3998 100644 --- a/bin/tests/system/upforwd/tests.sh +++ b/bin/tests/system/upforwd/tests.sh @@ -21,8 +21,6 @@ DIGOPTS="+tcp +noadd +nosea +nostat +noquest +nocomm +nocmd -p ${PORT}" status=0 n=1 -sleep 5 - echo_i "waiting for servers to be ready for testing ($n)" for i in 1 2 3 4 5 6 7 8 9 10 do diff --git a/bin/tests/system/xfer/tests.sh b/bin/tests/system/xfer/tests.sh index 11a27cb364..13fc762c4a 100755 --- a/bin/tests/system/xfer/tests.sh +++ b/bin/tests/system/xfer/tests.sh @@ -431,11 +431,17 @@ $DIG -p ${PORT} txt mapped @10.53.0.3 > dig.out.1.$n grep "status: NOERROR," dig.out.1.$n > /dev/null || tmp=1 $PERL $SYSTEMTESTTOP/stop.pl xfer ns3 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} xfer ns3 -$DIG -p ${PORT} txt mapped @10.53.0.3 > dig.out.2.$n -grep "status: NOERROR," dig.out.2.$n > /dev/null || tmp=1 -$DIG -p ${PORT} axfr mapped @10.53.0.3 > dig.out.3.$n -digcomp knowngood.mapped dig.out.3.$n || tmp=1 -if test $tmp != 0 ; then echo_i "failed"; fi +for try in 0 1 2 3 4 5 6 7 8 9; do + iret=0 + $DIG -p ${PORT} txt mapped @10.53.0.3 > dig.out.2.$n + grep "status: NOERROR," dig.out.2.$n > /dev/null || iret=1 + $DIG -p ${PORT} axfr mapped @10.53.0.3 > dig.out.3.$n + digcomp knowngood.mapped dig.out.3.$n || iret=1 + [ "$iret" -eq 0 ] && break + sleep 1 +done +[ "$iret" -eq 0 ] || tmp=1 +[ "$tmp" -ne 0 ] && echo_i "failed" status=`expr $status + $tmp` n=`expr $n + 1` diff --git a/bin/tools/mdig.c b/bin/tools/mdig.c index a4e6968bf8..bf889228e4 100644 --- a/bin/tools/mdig.c +++ b/bin/tools/mdig.c @@ -2047,7 +2047,7 @@ main(int argc, char *argv[]) { fatal("can't choose between IPv4 and IPv6"); taskmgr = NULL; - RUNCHECK(isc_taskmgr_create(mctx, 1, 0, &taskmgr)); + RUNCHECK(isc_taskmgr_create(mctx, 1, 0, NULL, &taskmgr)); task = NULL; RUNCHECK(isc_task_create(taskmgr, 0, &task)); timermgr = NULL; diff --git a/config.h.in b/config.h.in index 5f66011d53..88f46849b8 100644 --- a/config.h.in +++ b/config.h.in @@ -366,6 +366,9 @@ /* define if struct stat has st_mtim.tv_nsec field */ #undef HAVE_STAT_NSEC +/* Define to 1 if you have the header file. */ +#undef HAVE_STDALIGN_H + /* Define to 1 if you have the header file. */ #undef HAVE_STDATOMIC_H diff --git a/configure b/configure index cd2b3540d5..1794f5113a 100755 --- a/configure +++ b/configure @@ -747,6 +747,8 @@ OPENSSL_LIBS OPENSSL_CFLAGS INSTALL_LIBRARY ALWAYS_DEFINES +LIBUV_LIBS +LIBUV_CFLAGS PTHREAD_CFLAGS PTHREAD_LIBS PTHREAD_CC @@ -848,6 +850,7 @@ infodir docdir oldincludedir includedir +runstatedir localstatedir sharedstatedir sysconfdir @@ -967,6 +970,8 @@ PKG_CONFIG_LIBDIR MAXMINDDB_CFLAGS MAXMINDDB_LIBS MAXMINDDB_PREFIX +LIBUV_CFLAGS +LIBUV_LIBS OPENSSL_CFLAGS OPENSSL_LIBS LIBXML2_CFLAGS @@ -1018,6 +1023,7 @@ datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' +runstatedir='${localstatedir}/run' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' @@ -1270,6 +1276,15 @@ do | -silent | --silent | --silen | --sile | --sil) silent=yes ;; + -runstatedir | --runstatedir | --runstatedi | --runstated \ + | --runstate | --runstat | --runsta | --runst | --runs \ + | --run | --ru | --r) + ac_prev=runstatedir ;; + -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ + | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ + | --run=* | --ru=* | --r=*) + runstatedir=$ac_optarg ;; + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ @@ -1407,7 +1422,7 @@ fi for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ - libdir localedir mandir + libdir localedir mandir runstatedir do eval ac_val=\$$ac_var # Remove trailing slashes. @@ -1560,6 +1575,7 @@ Fine tuning of the installation directories: --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] @@ -1726,6 +1742,9 @@ Some influential environment variables: linker flags for MAXMINDDB, overriding pkg-config MAXMINDDB_PREFIX value of prefix for libmaxminddb, overriding pkg-config + LIBUV_CFLAGS + C compiler flags for LIBUV, overriding pkg-config + LIBUV_LIBS linker flags for LIBUV, overriding pkg-config OPENSSL_CFLAGS C compiler flags for OPENSSL, overriding pkg-config OPENSSL_LIBS @@ -4000,7 +4019,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -4046,7 +4065,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -4070,7 +4089,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -4115,7 +4134,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -4139,7 +4158,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -15768,6 +15787,154 @@ fi done +# libuv +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for libuv" >&5 +$as_echo_n "checking for libuv... " >&6; } + +pkg_failed=no +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for libuv >= 1.0.0" >&5 +$as_echo_n "checking for libuv >= 1.0.0... " >&6; } + +if test -n "$LIBUV_CFLAGS"; then + pkg_cv_LIBUV_CFLAGS="$LIBUV_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libuv >= 1.0.0\""; } >&5 + ($PKG_CONFIG --exists --print-errors "libuv >= 1.0.0") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_LIBUV_CFLAGS=`$PKG_CONFIG --cflags "libuv >= 1.0.0" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$LIBUV_LIBS"; then + pkg_cv_LIBUV_LIBS="$LIBUV_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libuv >= 1.0.0\""; } >&5 + ($PKG_CONFIG --exists --print-errors "libuv >= 1.0.0") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_LIBUV_LIBS=`$PKG_CONFIG --libs "libuv >= 1.0.0" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + LIBUV_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libuv >= 1.0.0" 2>&1` + else + LIBUV_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libuv >= 1.0.0" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$LIBUV_PKG_ERRORS" >&5 + + as_fn_error $? "libuv not found" "$LINENO" 5 +elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + as_fn_error $? "libuv not found" "$LINENO" 5 +else + LIBUV_CFLAGS=$pkg_cv_LIBUV_CFLAGS + LIBUV_LIBS=$pkg_cv_LIBUV_LIBS + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +fi + + + CCASFLAGS_libuv_ax_save_flags=$CCASFLAGS + + + + CFLAGS_libuv_ax_save_flags=$CFLAGS + + + + CPPFLAGS_libuv_ax_save_flags=$CPPFLAGS + + + + CXXFLAGS_libuv_ax_save_flags=$CXXFLAGS + + + + ERLCFLAGS_libuv_ax_save_flags=$ERLCFLAGS + + + + FCFLAGS_libuv_ax_save_flags=$FCFLAGS + + + + FCLIBS_libuv_ax_save_flags=$FCLIBS + + + + FFLAGS_libuv_ax_save_flags=$FFLAGS + + + + FLIBS_libuv_ax_save_flags=$FLIBS + + + + GCJFLAGS_libuv_ax_save_flags=$GCJFLAGS + + + + JAVACFLAGS_libuv_ax_save_flags=$JAVACFLAGS + + + + LDFLAGS_libuv_ax_save_flags=$LDFLAGS + + + + LIBS_libuv_ax_save_flags=$LIBS + + + + OBJCFLAGS_libuv_ax_save_flags=$OBJCFLAGS + + + + OBJCXXFLAGS_libuv_ax_save_flags=$OBJCXXFLAGS + + + + UPCFLAGS_libuv_ax_save_flags=$UPCFLAGS + + + + VALAFLAGS_libuv_ax_save_flags=$VALAFLAGS + + + + +CFLAGS="$CFLAGS $LIBUV_CFLAGS" +LIBS="$LIBS $LIBUV_LIBS" + # # flockfile is usually provided by pthreads # @@ -18149,7 +18316,7 @@ fi ;; #( esac if test "$GCC" = "yes"; then : - STD_CWARNINGS="$STD_CWARNINGS -W -Wall -Wmissing-prototypes -Wcast-qual -Wwrite-strings -Wformat -Wpointer-arith" + STD_CWARNINGS="$STD_CWARNINGS -W -Wall -Wmissing-prototypes -Wcast-qual -Wwrite-strings -Wformat -Wpointer-arith -Wno-missing-field-initializers" fi @@ -19672,6 +19839,19 @@ done LIBS="$LIBS $ISC_ATOMIC_LIBS" +for ac_header in stdalign.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "stdalign.h" "ac_cv_header_stdalign_h" "$ac_includes_default" +if test "x$ac_cv_header_stdalign_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_STDALIGN_H 1 +_ACEOF + +fi + +done + + for ac_header in uchar.h do : ac_fn_c_check_header_mongrel "$LINENO" "uchar.h" "ac_cv_header_uchar_h" "$ac_includes_default" @@ -23306,7 +23486,7 @@ ac_config_commands="$ac_config_commands chmod" # elsewhere if there's a good reason for doing so. # -ac_config_files="$ac_config_files make/Makefile make/mkdep Makefile bin/Makefile bin/check/Makefile bin/confgen/Makefile bin/confgen/unix/Makefile bin/delv/Makefile bin/dig/Makefile bin/dnssec/Makefile bin/named/Makefile bin/named/unix/Makefile bin/nsupdate/Makefile bin/pkcs11/Makefile bin/plugins/Makefile bin/python/Makefile bin/python/isc/Makefile bin/python/isc/utils.py bin/python/isc/tests/Makefile bin/python/dnssec-checkds.py bin/python/dnssec-coverage.py bin/python/dnssec-keymgr.py bin/python/isc/__init__.py bin/python/isc/checkds.py bin/python/isc/coverage.py bin/python/isc/dnskey.py bin/python/isc/eventlist.py bin/python/isc/keydict.py bin/python/isc/keyevent.py bin/python/isc/keymgr.py bin/python/isc/keyseries.py bin/python/isc/keyzone.py bin/python/isc/policy.py bin/python/isc/rndc.py bin/python/isc/tests/dnskey_test.py bin/python/isc/tests/policy_test.py bin/rndc/Makefile bin/tests/Makefile bin/tests/headerdep_test.sh bin/tests/optional/Makefile bin/tests/pkcs11/Makefile bin/tests/pkcs11/benchmarks/Makefile bin/tests/system/Makefile bin/tests/system/conf.sh bin/tests/system/dlzexternal/Makefile bin/tests/system/dlzexternal/ns1/dlzs.conf bin/tests/system/dyndb/Makefile bin/tests/system/dyndb/driver/Makefile bin/tests/system/pipelined/Makefile bin/tests/system/rndc/Makefile bin/tests/system/rpz/Makefile bin/tests/system/rsabigexponent/Makefile bin/tests/system/tkey/Makefile bin/tools/Makefile contrib/scripts/check-secure-delegation.pl contrib/scripts/zone-edit.sh doc/Makefile doc/arm/Makefile doc/arm/noteversion.xml doc/arm/pkgversion.xml doc/arm/releaseinfo.xml doc/doxygen/Doxyfile doc/doxygen/Makefile doc/doxygen/doxygen-input-filter doc/misc/Makefile doc/tex/Makefile doc/tex/armstyle.sty doc/xsl/Makefile doc/xsl/isc-docbook-chunk.xsl doc/xsl/isc-docbook-html.xsl doc/xsl/isc-manpage.xsl doc/xsl/isc-notes-html.xsl lib/Makefile lib/bind9/Makefile lib/bind9/include/Makefile lib/bind9/include/bind9/Makefile lib/dns/Makefile lib/dns/include/Makefile lib/dns/include/dns/Makefile lib/dns/include/dst/Makefile lib/dns/tests/Makefile lib/irs/Makefile lib/irs/include/Makefile lib/irs/include/irs/Makefile lib/irs/include/irs/netdb.h lib/irs/include/irs/platform.h lib/irs/tests/Makefile lib/isc/pthreads/Makefile lib/isc/pthreads/include/Makefile lib/isc/pthreads/include/isc/Makefile lib/isc/Makefile lib/isc/include/Makefile lib/isc/include/isc/Makefile lib/isc/include/isc/platform.h lib/isc/include/pk11/Makefile lib/isc/include/pkcs11/Makefile lib/isc/tests/Makefile lib/isc/unix/Makefile lib/isc/unix/include/Makefile lib/isc/unix/include/isc/Makefile lib/isccc/Makefile lib/isccc/include/Makefile lib/isccc/include/isccc/Makefile lib/isccc/tests/Makefile lib/isccfg/Makefile lib/isccfg/include/Makefile lib/isccfg/include/isccfg/Makefile lib/isccfg/tests/Makefile lib/ns/Makefile lib/ns/include/Makefile lib/ns/include/ns/Makefile lib/ns/tests/Makefile lib/samples/Makefile lib/samples/Makefile-postinstall unit/unittest.sh fuzz/Makefile" +ac_config_files="$ac_config_files make/Makefile make/mkdep Makefile bin/Makefile bin/check/Makefile bin/confgen/Makefile bin/confgen/unix/Makefile bin/delv/Makefile bin/dig/Makefile bin/dnssec/Makefile bin/named/Makefile bin/named/unix/Makefile bin/nsupdate/Makefile bin/pkcs11/Makefile bin/plugins/Makefile bin/python/Makefile bin/python/isc/Makefile bin/python/isc/utils.py bin/python/isc/tests/Makefile bin/python/dnssec-checkds.py bin/python/dnssec-coverage.py bin/python/dnssec-keymgr.py bin/python/isc/__init__.py bin/python/isc/checkds.py bin/python/isc/coverage.py bin/python/isc/dnskey.py bin/python/isc/eventlist.py bin/python/isc/keydict.py bin/python/isc/keyevent.py bin/python/isc/keymgr.py bin/python/isc/keyseries.py bin/python/isc/keyzone.py bin/python/isc/policy.py bin/python/isc/rndc.py bin/python/isc/tests/dnskey_test.py bin/python/isc/tests/policy_test.py bin/rndc/Makefile bin/tests/Makefile bin/tests/headerdep_test.sh bin/tests/optional/Makefile bin/tests/pkcs11/Makefile bin/tests/pkcs11/benchmarks/Makefile bin/tests/system/Makefile bin/tests/system/conf.sh bin/tests/system/dlzexternal/Makefile bin/tests/system/dlzexternal/ns1/dlzs.conf bin/tests/system/dyndb/Makefile bin/tests/system/dyndb/driver/Makefile bin/tests/system/pipelined/Makefile bin/tests/system/rndc/Makefile bin/tests/system/rpz/Makefile bin/tests/system/rsabigexponent/Makefile bin/tests/system/tkey/Makefile bin/tools/Makefile contrib/scripts/check-secure-delegation.pl contrib/scripts/zone-edit.sh doc/Makefile doc/arm/Makefile doc/arm/noteversion.xml doc/arm/pkgversion.xml doc/arm/releaseinfo.xml doc/doxygen/Doxyfile doc/doxygen/Makefile doc/doxygen/doxygen-input-filter doc/misc/Makefile doc/tex/Makefile doc/tex/armstyle.sty doc/xsl/Makefile doc/xsl/isc-docbook-chunk.xsl doc/xsl/isc-docbook-html.xsl doc/xsl/isc-manpage.xsl doc/xsl/isc-notes-html.xsl lib/Makefile lib/bind9/Makefile lib/bind9/include/Makefile lib/bind9/include/bind9/Makefile lib/dns/Makefile lib/dns/include/Makefile lib/dns/include/dns/Makefile lib/dns/include/dst/Makefile lib/dns/tests/Makefile lib/irs/Makefile lib/irs/include/Makefile lib/irs/include/irs/Makefile lib/irs/include/irs/netdb.h lib/irs/include/irs/platform.h lib/irs/tests/Makefile lib/isc/pthreads/Makefile lib/isc/pthreads/include/Makefile lib/isc/pthreads/include/isc/Makefile lib/isc/Makefile lib/isc/include/Makefile lib/isc/include/isc/Makefile lib/isc/include/isc/platform.h lib/isc/include/pk11/Makefile lib/isc/include/pkcs11/Makefile lib/isc/netmgr/Makefile lib/isc/tests/Makefile lib/isc/unix/Makefile lib/isc/unix/include/Makefile lib/isc/unix/include/isc/Makefile lib/isccc/Makefile lib/isccc/include/Makefile lib/isccc/include/isccc/Makefile lib/isccc/tests/Makefile lib/isccfg/Makefile lib/isccfg/include/Makefile lib/isccfg/include/isccfg/Makefile lib/isccfg/tests/Makefile lib/ns/Makefile lib/ns/include/Makefile lib/ns/include/ns/Makefile lib/ns/tests/Makefile lib/samples/Makefile lib/samples/Makefile-postinstall unit/unittest.sh fuzz/Makefile" # @@ -24406,6 +24586,7 @@ do "lib/isc/include/isc/platform.h") CONFIG_FILES="$CONFIG_FILES lib/isc/include/isc/platform.h" ;; "lib/isc/include/pk11/Makefile") CONFIG_FILES="$CONFIG_FILES lib/isc/include/pk11/Makefile" ;; "lib/isc/include/pkcs11/Makefile") CONFIG_FILES="$CONFIG_FILES lib/isc/include/pkcs11/Makefile" ;; + "lib/isc/netmgr/Makefile") CONFIG_FILES="$CONFIG_FILES lib/isc/netmgr/Makefile" ;; "lib/isc/tests/Makefile") CONFIG_FILES="$CONFIG_FILES lib/isc/tests/Makefile" ;; "lib/isc/unix/Makefile") CONFIG_FILES="$CONFIG_FILES lib/isc/unix/Makefile" ;; "lib/isc/unix/include/Makefile") CONFIG_FILES="$CONFIG_FILES lib/isc/unix/include/Makefile" ;; diff --git a/configure.ac b/configure.ac index 385d715182..f657d81562 100644 --- a/configure.ac +++ b/configure.ac @@ -641,6 +641,15 @@ AC_CHECK_FUNCS([pthread_setaffinity_np cpuset_setaffinity processor_bind sched_s AC_CHECK_FUNCS([pthread_setname_np pthread_set_name_np]) AC_CHECK_HEADERS([pthread_np.h], [], [], [#include ]) +# libuv +AC_MSG_CHECKING(for libuv) +PKG_CHECK_MODULES([LIBUV], [libuv >= 1.0.0], [], + [AC_MSG_ERROR([libuv not found])]) +AX_SAVE_FLAGS([libuv]) + +CFLAGS="$CFLAGS $LIBUV_CFLAGS" +LIBS="$LIBS $LIBUV_LIBS" + # # flockfile is usually provided by pthreads # @@ -1321,7 +1330,7 @@ AS_CASE([$host], [MKDEPCFLAGS="-xM"])]) AS_IF([test "$GCC" = "yes"], - [STD_CWARNINGS="$STD_CWARNINGS -W -Wall -Wmissing-prototypes -Wcast-qual -Wwrite-strings -Wformat -Wpointer-arith"] + [STD_CWARNINGS="$STD_CWARNINGS -W -Wall -Wmissing-prototypes -Wcast-qual -Wwrite-strings -Wformat -Wpointer-arith -Wno-missing-field-initializers"] ) AX_CHECK_COMPILE_FLAG([-fno-strict-aliasing], @@ -1796,6 +1805,8 @@ AC_CHECK_HEADERS( ]) LIBS="$LIBS $ISC_ATOMIC_LIBS" +AC_CHECK_HEADERS([stdalign.h]) + AC_CHECK_HEADERS([uchar.h]) # @@ -2817,6 +2828,7 @@ AC_CONFIG_FILES([ lib/isc/include/isc/platform.h lib/isc/include/pk11/Makefile lib/isc/include/pkcs11/Makefile + lib/isc/netmgr/Makefile lib/isc/tests/Makefile lib/isc/unix/Makefile lib/isc/unix/include/Makefile diff --git a/doc/arm/notes-feature-changes.xml b/doc/arm/notes-feature-changes.xml index f5ae078e88..f88b499fda 100644 --- a/doc/arm/notes-feature-changes.xml +++ b/doc/arm/notes-feature-changes.xml @@ -11,6 +11,16 @@
Feature Changes + + + A new asynchronous network communications system based on + libuv is now used by named + for listening for incoming requests and responding to them. + This change will make it easier to improve performance and + implement new protocol layers (for example, DNS over TLS) in + the future. [GL #29] + + named will now log a warning if diff --git a/doc/design/netmgr.md b/doc/design/netmgr.md new file mode 100644 index 0000000000..3b2c8420cd --- /dev/null +++ b/doc/design/netmgr.md @@ -0,0 +1,96 @@ +# Netmgr + +Netmgr (aka rainbow duck) is the new networking system for BIND. It's based +on libuv, although it does not expose any of the libuv API, in order to +keep the API agnostic of underlying library. + +## A bit of history + +Networking in BIND9 up to 9.12 works with a single event loop (epoll() on +Linux, kqueue on FreeBSD, etc). + +When a client wants to read from a socket, it creates a socket event +associated with a task that will receive this event. An +`isc_socket_{read,write,etc.}` operation tries to read directly from +the socket; if it succeeds, it sends the socket event to the task +provided by the callee. If it doesn't, it adds an event to an event +loop, and when this event is received the listener is re-set, and an +internal task is launched to read the data from the socket. After the +internal task is done, it launches the task from socket event provided +by the callee. This means that a simple socket operation causes a +lot of context switches. + +9.14 fixed some of these issues by having multiple event loops in separate +threads (one per CPU), that can read the data immediately and then call +the socket event, but this is still sub-optimal. + +## Basic concepts + +### `isc_nm_t` + +The `isc_nm_t` structure represents the network manager itself. It +contains a configurable number (generally the same as the number of CPUs) +of 'networker' objects, each of which represents a thread for executing +networking events. + +The manager contains flags to indicate whether it has been paused or +interlocked, and counters for the number of workers running and the +number of workers paused. + +Each networker object contains a queue of incoming asynchronous events +and a pool of buffers into which messages will be copied when received. + +### `isc_nmsocket_t` + +`isc_nmsocket_t` is a wrapper around a libuv socket. It is configured +with + +### `isc_nmhandle_t` + +An `isc_nmhandle_t` object represents an interface that can be read or +written. For TCP it's a socket, and for UDP it's a socket with a peer +address. It is always associated with one and only one `isc_nmsocket_t` +object. + +When a handle object is allocated, it may be allocated with a block of +'extra' space in which another object will be stored that is associated +with that handle: for example, an `ns_client_t` structure storing +information about an incoming request. + +The handle is reference counted; when references drop to zero it calls +the 'reset' callback for its associated object and places itself onto +a stack of inactive handles in its corresponding `isc_nmsocket_t` +structure so it can be quickly reused when the next incoming message +is received. When the handle is freed (which may happen if the socket's +inactive-handles stack is full or when the socket is destroyed) then the +associated object's 'put' callback will be called to free any resources +it allocated. + +## UDP listening + +UDP listener sockets automatically create an array of 'child' sockets, +each associated with one networker, and all listening on the same address +via `SO_REUSEADDR`. (The parent's reference counter is used for all the +parent and child sockets together; none are destroyed until there are no +remaining referenes to any of tem.) + +## TCP listening + +A TCP listener socket cannot listen on multiple threads in parallel, +so receiving a TCP connection can cause a context switch, but this is +expected to be rare enough not to impact performance significantly. + +When connected, a TCP socket will attach to the system-wide TCP clients +quota. + +## TCP listening for DNS + +A TCPDNS listener is a wrapper around a TCP socket which specifically +handles DNS traffic, including the two-byte length field that prepends DNS +messages over TCP. + +Other wrapper socket types can be added in the future, such as a TLS socket +wrapper to implement encryption or an HTTP wrapper to implement the HTTP +protocol. This will enable the system to have a transport-neutral network +manager socket over which DNS can be sent without knowing anything about +transport, encryption, etc. diff --git a/doc/dev/dev.md b/doc/dev/dev.md index d769d6f18b..7fbbc8ac86 100644 --- a/doc/dev/dev.md +++ b/doc/dev/dev.md @@ -654,10 +654,6 @@ Items can be removed from the list using `ISC_LIST_UNLINK`: ISC_LIST_UNLINK(foolist, foo, link); -A similar but smaller set of `ISC_QUEUE` macros, including `ISC_QUEUE_PUSH` -and `ISC_QUEUE_POP`, are provided to implement strict FIFO lists, with -built-in fine-grained locking. - #### Names The `dns_name` API has facilities for processing DNS names and labels, diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index c955d216dc..a3e28f3e12 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -598,7 +598,8 @@ deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) { dns_qid_t *qid; REQUIRE(disp->port_table != NULL); - REQUIRE(portentry != NULL && isc_refcount_current(&portentry->refs) > 0); + REQUIRE(portentry != NULL && + isc_refcount_current(&portentry->refs) > 0); if (isc_refcount_decrement(&portentry->refs) == 1) { qid = DNS_QID(disp); diff --git a/lib/dns/message.c b/lib/dns/message.c index 9c6292a9a3..31321b8644 100644 --- a/lib/dns/message.c +++ b/lib/dns/message.c @@ -1103,7 +1103,7 @@ getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, result = ISC_R_NOMEMORY; goto cleanup; } - rdataset = isc_mempool_get(msg->rdspool); + rdataset = isc_mempool_get(msg->rdspool); if (rdataset == NULL) { result = ISC_R_NOMEMORY; goto cleanup; diff --git a/lib/dns/tests/dnstest.c b/lib/dns/tests/dnstest.c index 539e6439e7..ad525cebe0 100644 --- a/lib/dns/tests/dnstest.c +++ b/lib/dns/tests/dnstest.c @@ -116,7 +116,7 @@ create_managers(void) { isc_result_t result; ncpus = isc_os_ncpus(); - CHECK(isc_taskmgr_create(dt_mctx, ncpus, 0, &taskmgr)); + CHECK(isc_taskmgr_create(dt_mctx, ncpus, 0, NULL, &taskmgr)); CHECK(isc_timermgr_create(dt_mctx, &timermgr)); CHECK(isc_socketmgr_create(dt_mctx, &socketmgr)); CHECK(isc_task_create(taskmgr, 0, &maintask)); diff --git a/lib/isc/Makefile.in b/lib/isc/Makefile.in index ae7ca482bf..906ebd3e69 100644 --- a/lib/isc/Makefile.in +++ b/lib/isc/Makefile.in @@ -46,18 +46,20 @@ WIN32OBJS = win32/condition.@O@ win32/dir.@O@ win32/errno.@O@ \ # Alphabetically OBJS = pk11.@O@ pk11_result.@O@ \ - aes.@O@ app.@O@ assertions.@O@ \ + aes.@O@ app.@O@ assertions.@O@ astack.@O@ \ backtrace.@O@ base32.@O@ base64.@O@ \ bind9.@O@ buffer.@O@ bufferlist.@O@ \ commandline.@O@ counter.@O@ crc64.@O@ error.@O@ entropy.@O@ \ - event.@O@ hash.@O@ ht.@O@ heap.@O@ hex.@O@ hmac.@O@ \ - httpd.@O@ iterated_hash.@O@ \ + event.@O@ hash.@O@ ht.@O@ heap.@O@ hex.@O@ \ + hmac.@O@ hp.@O@ httpd.@O@ iterated_hash.@O@ \ lex.@O@ lfsr.@O@ lib.@O@ log.@O@ \ md.@O@ mem.@O@ mutexblock.@O@ \ + netmgr/netmgr.@O@ netmgr/tcp.@O@ netmgr/udp.@O@ \ + netmgr/tcpdns.@O@ netmgr/uverr2result.@O@ \ netaddr.@O@ netscope.@O@ nonce.@O@ openssl_shim.@O@ pool.@O@ \ - parseint.@O@ portset.@O@ quota.@O@ radix.@O@ random.@O@ \ - ratelimiter.@O@ region.@O@ regex.@O@ result.@O@ \ - rwlock.@O@ \ + parseint.@O@ portset.@O@ queue.@O@ quota.@O@ \ + radix.@O@ random.@O@ ratelimiter.@O@ \ + region.@O@ regex.@O@ result.@O@ rwlock.@O@ \ serial.@O@ siphash.@O@ sockaddr.@O@ stats.@O@ \ string.@O@ symtab.@O@ task.@O@ taskpool.@O@ \ tm.@O@ timer.@O@ version.@O@ \ @@ -66,15 +68,15 @@ SYMTBLOBJS = backtrace-emptytbl.@O@ # Alphabetically SRCS = pk11.c pk11_result.c \ - aes.c app.c assertions.c \ + aes.c app.c assertions.c astack.c \ backtrace.c base32.c base64.c bind9.c \ buffer.c bufferlist.c commandline.c counter.c crc64.c \ - entropy.c error.c event.c hash.c ht.c heap.c hex.c hmac.c \ - httpd.c iterated_hash.c \ + entropy.c error.c event.c hash.c ht.c heap.c \ + hex.c hmac.c hp.c httpd.c iterated_hash.c \ lex.c lfsr.c lib.c log.c \ md.c mem.c mutexblock.c \ netaddr.c netscope.c nonce.c openssl_shim.c pool.c \ - parseint.c portset.c quota.c radix.c random.c \ + parseint.c portset.c queue.c quota.c radix.c random.c \ ratelimiter.c region.c regex.c result.c rwlock.c \ serial.c siphash.c sockaddr.c stats.c string.c \ symtab.c task.c taskpool.c timer.c \ @@ -86,7 +88,7 @@ LIBS = ${OPENSSL_LIBS} @LIBS@ # Attempt to disable parallel processing. .NOTPARALLEL: .NO_PARALLEL: -SUBDIRS = include unix pthreads +SUBDIRS = include netmgr unix pthreads TARGETS = timestamp TESTDIRS = @UNITTESTS@ diff --git a/lib/isc/astack.c b/lib/isc/astack.c new file mode 100644 index 0000000000..05d94365d4 --- /dev/null +++ b/lib/isc/astack.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +struct isc_astack { + isc_mem_t *mctx; + size_t size; + size_t pos; + isc_mutex_t lock; + uintptr_t nodes[]; +}; + +isc_astack_t * +isc_astack_new(isc_mem_t *mctx, size_t size) { + isc_astack_t *stack = + isc_mem_get(mctx, + sizeof(isc_astack_t) + size * sizeof(uintptr_t)); + + stack->mctx = NULL; + isc_mem_attach(mctx, &stack->mctx); + stack->size = size; + stack->pos = 0; + memset(stack->nodes, 0, size * sizeof(uintptr_t)); + isc_mutex_init(&stack->lock); + return (stack); +} + +bool +isc_astack_trypush(isc_astack_t *stack, void *obj) { + if (isc_mutex_trylock(&stack->lock) == false) { + if (stack->pos >= stack->size) { + isc_mutex_unlock(&stack->lock); + return (false); + } + stack->nodes[stack->pos++] = (uintptr_t) obj; + isc_mutex_unlock(&stack->lock); + return (true); + } else { + return (false); + } +} + +void * +isc_astack_pop(isc_astack_t *stack) { + isc_mutex_lock(&stack->lock); + uintptr_t rv; + if (stack->pos == 0) { + rv = 0; + } else { + rv = stack->nodes[--stack->pos]; + } + isc_mutex_unlock(&stack->lock); + return ((void*) rv); +} + +void +isc_astack_destroy(isc_astack_t *stack) { + REQUIRE(stack->pos == 0); + + isc_mem_putanddetach(&stack->mctx, stack, + sizeof(struct isc_astack) + + stack->size * sizeof(uintptr_t)); +} diff --git a/lib/isc/hp.c b/lib/isc/hp.c new file mode 100644 index 0000000000..4d14a54c08 --- /dev/null +++ b/lib/isc/hp.c @@ -0,0 +1,222 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +/* + * Hazard Pointer implementation. + * + * This work is based on C++ code available from: + * https://github.com/pramalhe/ConcurrencyFreaks/ + * + * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Concurrency Freaks nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include + +#define HP_MAX_THREADS 128 +#define HP_MAX_HPS 4 /* This is named 'K' in the HP paper */ +#define CLPAD (128 / sizeof(uintptr_t)) +#define HP_THRESHOLD_R 0 /* This is named 'R' in the HP paper */ + +/* Maximum number of retired objects per thread */ +#define MAX_RETIRED (HP_MAX_THREADS * HP_MAX_HPS) + +#define TID_UNKNOWN -1 + +static atomic_int_fast32_t tid_v_base; +static bool tid_v_initialized; + +#if defined(HAVE_TLS) +#if defined(HAVE_THREAD_LOCAL) +#include +static thread_local int tid_v = TID_UNKNOWN; +#elif defined(HAVE___THREAD) +static __thread int tid_v = TID_UNKNOWN; +#elif defined(HAVE___DECLSPEC_THREAD) +static __declspec( thread ) int tid_v = TID_UNKNOWN; +#else /* if defined(HAVE_THREAD_LOCAL) */ +#error "Unknown method for defining a TLS variable!" +#endif /* if defined(HAVE_THREAD_LOCAL) */ +#else /* if defined(HAVE_TLS) */ +#error "Thread-local storage support is required!" +#endif /* if defined(HAVE_TLS) */ + +typedef struct retirelist { + int size; + uintptr_t list[MAX_RETIRED]; +} retirelist_t; + +struct isc_hp { + int max_hps; + isc_mem_t *mctx; + atomic_uintptr_t *hp[HP_MAX_THREADS]; + retirelist_t *rl[HP_MAX_THREADS]; + isc_hp_deletefunc_t *deletefunc; +}; + +static inline int +tid() { + if (!tid_v_initialized) { + atomic_init(&tid_v_base, 0); + tid_v_initialized = true; + } + if (tid_v == TID_UNKNOWN) { + tid_v = atomic_fetch_add(&tid_v_base, 1); + REQUIRE(tid_v < HP_MAX_THREADS); + } + + return (tid_v); +} + +isc_hp_t * +isc_hp_new(isc_mem_t *mctx, size_t max_hps, isc_hp_deletefunc_t *deletefunc) { + isc_hp_t *hp = isc_mem_get(mctx, sizeof(*hp)); + + if (max_hps == 0) { + max_hps = HP_MAX_HPS; + } + + *hp = (isc_hp_t){ + .max_hps = max_hps, + .deletefunc = deletefunc + }; + + isc_mem_attach(mctx, &hp->mctx); + + for (int i = 0; i < HP_MAX_THREADS; i++) { + hp->hp[i] = isc_mem_get(mctx, CLPAD * 2 * sizeof(hp->hp[i][0])); + hp->rl[i] = isc_mem_get(mctx, sizeof(*hp->rl[0])); + *hp->rl[i] = (retirelist_t) { .size = 0 }; + + for (int j = 0; j < hp->max_hps; j++) { + atomic_init(&hp->hp[i][j], 0); + } + } + + return (hp); +} + +void +isc_hp_destroy(isc_hp_t *hp) { + for (int i = 0; i < HP_MAX_THREADS; i++) { + isc_mem_put(hp->mctx, hp->hp[i], + CLPAD * 2 * sizeof(uintptr_t)); + + for (int j = 0; j < hp->rl[i]->size; j++) { + void *data = (void *)hp->rl[i]->list[j]; + hp->deletefunc(data); + } + + isc_mem_put(hp->mctx, hp->rl[i], sizeof(*hp->rl[0])); + } + + isc_mem_putanddetach(&hp->mctx, hp, sizeof(*hp)); +} + +void +isc_hp_clear(isc_hp_t *hp) { + for (int i = 0; i < hp->max_hps; i++) { + atomic_store_release(&hp->hp[tid()][i], 0); + } +} + +void isc_hp_clear_one(isc_hp_t *hp, int ihp) { + atomic_store_release(&hp->hp[tid()][ihp], 0); +} + +uintptr_t +isc_hp_protect(isc_hp_t *hp, int ihp, atomic_uintptr_t *atom) { + uintptr_t n = 0; + uintptr_t ret; + while ((ret = atomic_load(atom)) != n) { + atomic_store(&hp->hp[tid()][ihp], ret); + n = ret; + } + return (ret); +} + +uintptr_t +isc_hp_protect_ptr(isc_hp_t *hp, int ihp, atomic_uintptr_t ptr) { + atomic_store(&hp->hp[tid()][ihp], atomic_load(&ptr)); + return (atomic_load(&ptr)); +} + +uintptr_t +isc_hp_protect_release(isc_hp_t *hp, int ihp, atomic_uintptr_t ptr) { + atomic_store_release(&hp->hp[tid()][ihp], atomic_load(&ptr)); + return (atomic_load(&ptr)); +} + +void +isc_hp_retire(isc_hp_t *hp, uintptr_t ptr) { + hp->rl[tid()]->list[hp->rl[tid()]->size++] = ptr; + INSIST(hp->rl[tid()]->size < MAX_RETIRED); + + if (hp->rl[tid()]->size < HP_THRESHOLD_R) { + return; + } + + for (int iret = 0; iret < hp->rl[tid()]->size; iret++) { + uintptr_t obj = hp->rl[tid()]->list[iret]; + bool can_delete = true; + for (int itid = 0; + itid < HP_MAX_THREADS && can_delete; + itid++) + { + for (int ihp = hp->max_hps-1; ihp >= 0; ihp--) { + if (atomic_load(&hp->hp[itid][ihp]) == obj) { + can_delete = false; + break; + } + } + } + + if (can_delete) { + size_t bytes = (hp->rl[tid()]->size - iret) * + sizeof(hp->rl[tid()]->list[0]); + memmove(&hp->rl[tid()]->list[iret], + &hp->rl[tid()]->list[iret + 1], + bytes); + hp->rl[tid()]->size--; + hp->deletefunc((void *)obj); + } + } +} diff --git a/lib/isc/include/isc/Makefile.in b/lib/isc/include/isc/Makefile.in index a78f2c6d0b..620c8c308d 100644 --- a/lib/isc/include/isc/Makefile.in +++ b/lib/isc/include/isc/Makefile.in @@ -18,12 +18,12 @@ VERSION=@BIND9_VERSION@ # machine generated. The latter are handled specially in the # install target below. # -HEADERS = aes.h app.h assertions.h atomic.h backtrace.h \ +HEADERS = aes.h app.h assertions.h astack.h atomic.h backtrace.h \ base32.h base64.h bind9.h buffer.h bufferlist.h \ commandline.h counter.h crc64.h deprecated.h \ endian.h errno.h error.h event.h eventclass.h \ file.h formatcheck.h fsaccess.h fuzz.h \ - hash.h heap.h hex.h hmac.h ht.h httpd.h \ + hash.h heap.h hex.h hmac.h hp.h ht.h httpd.h \ interfaceiter.h iterated_hash.h \ lang.h lex.h lfsr.h lib.h likely.h list.h log.h \ magic.h md.h mem.h meminfo.h mutexblock.h \ diff --git a/lib/isc/include/isc/astack.h b/lib/isc/include/isc/astack.h new file mode 100644 index 0000000000..e0ea66ca2b --- /dev/null +++ b/lib/isc/include/isc/astack.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#include +#include +#include + +isc_astack_t * +isc_astack_new(isc_mem_t *mctx, size_t size); +/*%< + * Allocate and initialize a new array stack of size 'size'. + */ + +void +isc_astack_destroy(isc_astack_t *stack); +/*%< + * Free an array stack 'stack'. + * + * Requires: + * \li 'stack' is empty. + */ + +bool +isc_astack_trypush(isc_astack_t *stack, void *obj); +/*%< + * Try to push 'obj' onto array stack 'astack'. On failure, either + * because the stack size limit has been reached or because another + * thread has already changed the stack pointer, return 'false'. + */ + +void * +isc_astack_pop(isc_astack_t *stack); +/*%< + * Pop an object off of array stack 'stack'. If the stack is empty, + * return NULL. + */ diff --git a/lib/isc/include/isc/hp.h b/lib/isc/include/isc/hp.h new file mode 100644 index 0000000000..3cd542bd02 --- /dev/null +++ b/lib/isc/include/isc/hp.h @@ -0,0 +1,130 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +/* + * Hazard Pointer implementation. + * + * This work is based on C++ code available from: + * https://github.com/pramalhe/ConcurrencyFreaks/ + * + * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Concurrency Freaks nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +/*% + * Hazard pointers are a mechanism for protecting objects in memory + * from being deleted by other threads while in use. This allows + * safe lock-free data structures. + * + * This is an adaptation of the ConcurrencyFreaks implementation in C. + * More details available at https://github.com/pramalhe/ConcurrencyFreaks, + * in the file HazardPointers.hpp. + */ + +typedef void +(isc_hp_deletefunc_t)(void *); + +isc_hp_t * +isc_hp_new(isc_mem_t *mctx, size_t max_hps, isc_hp_deletefunc_t *deletefunc); +/*%< + * Create a new hazard pointer array of size 'max_hps' (or a reasonable + * default value if 'max_hps' is 0). The function 'deletefunc' will be + * used to delete objects protected by hazard pointers when it becomes + * safe to retire them. + */ + +void +isc_hp_destroy(isc_hp_t *hp); +/*%< + * Destroy a hazard pointer array and clean up all objects protected + * by hazard pointers. + */ + +void +isc_hp_clear(isc_hp_t *hp); +/*%< + * Clear all hazard pointers in the array for the current thread. + * + * Progress condition: wait-free bounded (by max_hps) + */ + +void +isc_hp_clear_one(isc_hp_t *hp, int ihp); +/*%< + * Clear a specified hazard pointer in the array for the current thread. + * + * Progress condition: wait-free population oblivious. + */ + +uintptr_t +isc_hp_protect(isc_hp_t *hp, int ihp, atomic_uintptr_t *atom); +/*%< + * Protect an object referenced by 'atom' with a hazard pointer for the + * current thread. + * + * Progress condition: lock-free. + */ + +uintptr_t +isc_hp_protect_ptr(isc_hp_t *hp, int ihp, atomic_uintptr_t ptr); +/*%< + * This returns the same value that is passed as ptr, which is sometimes + * useful. + * + * Progress condition: wait-free population oblivious. + */ + +uintptr_t +isc_hp_protect_release(isc_hp_t *hp, int ihp, atomic_uintptr_t ptr); +/*%< + * Same as isc_hp_protect_ptr(), but explicitly uses memory_order_release. + * + * Progress condition: wait-free population oblivious. + */ + +void +isc_hp_retire(isc_hp_t *hp, uintptr_t ptr); +/*%< + * Retire an object that is no longer in use by any thread, calling + * the delete function that was specified in isc_hp_new(). + * + * Progress condition: wait-free bounded (by the number of threads squared) + */ diff --git a/lib/isc/include/isc/mutexatomic.h b/lib/isc/include/isc/mutexatomic.h index c111081ee9..94d8998822 100644 --- a/lib/isc/include/isc/mutexatomic.h +++ b/lib/isc/include/isc/mutexatomic.h @@ -93,6 +93,11 @@ typedef struct atomic_uint_fast64 { uint64_t v; } atomic_uint_fast64_t; +typedef struct atomic_uintptr { + isc_mutex_t m; + uintptr_t v; +} atomic_uintptr_t; + typedef struct atomic_bool_s { isc_mutex_t m; bool v; @@ -198,3 +203,14 @@ typedef struct atomic_bool_s { atomic_compare_exchange_weak_explicit(obj, expected, desired, \ memory_order_seq_cst, \ memory_order_seq_cst) +#define atomic_exchange_explicit(obj, desired, order) \ + ({ \ + typeof((obj)->v) ___v; \ + REQUIRE(isc_mutex_lock(&(obj)->m) == ISC_R_SUCCESS); \ + ___v = (obj)->v; \ + (obj)->v = desired; \ + REQUIRE(isc_mutex_unlock(&(obj)->m) == ISC_R_SUCCESS); \ + ___v; \ + }) +#define atomic_exchange(obj, desired) \ + atomic_exchange_explicit(obj, desired, memory_order_seq_cst) diff --git a/lib/isc/include/isc/netmgr.h b/lib/isc/include/isc/netmgr.h new file mode 100644 index 0000000000..e68bed759c --- /dev/null +++ b/lib/isc/include/isc/netmgr.h @@ -0,0 +1,284 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#pragma once + +#include + +#include +#include +#include + +typedef enum { + NMEV_READ, + NMEV_WRITE, + NMEV_ACCEPT, + NMEV_CONNECTED, + NMEV_CANCELLED, + NMEV_SHUTDOWN +} isc_nm_eventtype; + +isc_nm_t * +isc_nm_start(isc_mem_t *mctx, uint32_t workers); +/*%< + * Creates a new network manager with 'workers' worker threads, + * and starts it running. + */ + +void +isc_nm_attach(isc_nm_t *mgr, isc_nm_t **dst); +void +isc_nm_detach(isc_nm_t **mgr0); +void +isc_nm_destroy(isc_nm_t **mgr0); +/*%< + * Attach/detach a network manager. When all references have been + * released, the network manager is shut down, freeing all resources. + * Destroy is working the same way as detach, but it actively waits + * for all other references to be gone. + */ + +/* Return thread id of current thread, or ISC_NETMGR_TID_UNKNOWN */ +int +isc_nm_tid(void); + +/* + * isc_nm_freehandle frees a handle, releasing resources + */ +void +isc_nm_freehandle(isc_nmhandle_t *handle); + +void +isc_nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target); +/*%< + * isc_nmsocket_attach attaches to a socket, increasing refcount + */ + +void +isc_nmsocket_close(isc_nmsocket_t *sock); + +void +isc_nmsocket_detach(isc_nmsocket_t **socketp); +/*%< + * isc_nmsocket_detach detaches from socket, decreasing refcount + * and possibly destroying the socket if it's no longer referenced. + */ + +void +isc_nmhandle_ref(isc_nmhandle_t *handle); +void +isc_nmhandle_unref(isc_nmhandle_t *handle); +/*%< + * Increment/decrement the reference counter in a netmgr handle, + * but (unlike the attach/detach functions) do not change the pointer + * value. If reference counters drop to zero, the handle can be + * marked inactive, possibly triggering deletion of its associated + * socket. + * + * (This will be used to prevent a client from being cleaned up when + * it's passed to an isc_task event handler. The libuv code would not + * otherwise know that the handle was in use and might free it, along + * with the client.) + */ + +void * +isc_nmhandle_getdata(isc_nmhandle_t *handle); + +void * +isc_nmhandle_getextra(isc_nmhandle_t *handle); + +typedef void (*isc_nm_opaquecb)(void *arg); + +bool +isc_nmhandle_is_stream(isc_nmhandle_t *handle); + +/* + * isc_nmhandle_t has a void * opaque field (usually - ns_client_t). + * We reuse handle and `opaque` can also be reused between calls. + * This function sets this field and two callbacks: + * - doreset resets the `opaque` to initial state + * - dofree frees everything associated with `opaque` + */ +void +isc_nmhandle_setdata(isc_nmhandle_t *handle, void *arg, + isc_nm_opaquecb doreset, isc_nm_opaquecb dofree); + +isc_sockaddr_t +isc_nmhandle_peeraddr(isc_nmhandle_t *handle); +isc_sockaddr_t +isc_nmhandle_localaddr(isc_nmhandle_t *handle); + +typedef void (*isc_nm_recv_cb_t)(isc_nmhandle_t *handle, isc_region_t *region, + void *cbarg); +/*%< + * Callback function to be used when receiving a packet. + * + * 'handle' the handle that can be used to send back the answer. + * 'region' contains the received data. It will be freed after + * return by caller. + * 'cbarg' the callback argument passed to isc_nm_listenudp(), + * isc_nm_listentcpdns(), or isc_nm_read(). + */ + +typedef void (*isc_nm_cb_t)(isc_nmhandle_t *handle, isc_result_t result, + void *cbarg); +/*%< + * Callback function for other network completion events (send, connect, + * accept). + * + * 'handle' the handle on which the event took place. + * 'result' the result of the event. + * 'cbarg' the callback argument passed to isc_nm_send(), + * isc_nm_tcp_connect(), or isc_nm_listentcp() + */ + +isc_result_t +isc_nm_listenudp(isc_nm_t *mgr, isc_nmiface_t *iface, + isc_nm_recv_cb_t cb, void *cbarg, + size_t extrasize, isc_nmsocket_t **sockp); +/*%< + * Start listening for UDP packets on interface 'iface' using net manager + * 'mgr'. + * + * On success, 'sockp' will be updated to contain a new listening UDP socket. + * + * When a packet is received on the socket, 'cb' will be called with 'cbarg' + * as its argument. + * + * When handles are allocated for the socket, 'extrasize' additional bytes + * will be allocated along with the handle for an associated object + * (typically ns_client). + */ + +void +isc_nm_udp_stoplistening(isc_nmsocket_t *sock); +/*%< + * Stop listening for UDP packets on socket 'sock'. + */ + +void +isc_nm_pause(isc_nm_t *mgr); +/*%< + * Pause all processing, equivalent to taskmgr exclusive tasks. + * It won't return until all workers have been paused. + */ + +void +isc_nm_resume(isc_nm_t *mgr); +/*%< + * Resume paused processing. It will return immediately + * after signalling workers to resume. + */ + +isc_result_t +isc_nm_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg); + +isc_result_t +isc_nm_pauseread(isc_nmsocket_t *sock); +/*%< + * Pause reading on this socket, while still remembering the callback. + */ + +isc_result_t +isc_nm_resumeread(isc_nmsocket_t *sock); +/*%< + * Resume reading from socket. + * + * Requires: + * \li 'sock' is a valid netmgr socket + * \li ...for which a read/recv callback has been defined. + */ + +isc_result_t +isc_nm_send(isc_nmhandle_t *handle, isc_region_t *region, + isc_nm_cb_t cb, void *cbarg); +/*%< + * Send the data in 'region' via 'handle'. Afterward, the callback 'cb' is + * called with the argument 'cbarg'. + * + * 'region' is not copied; it has to be allocated beforehand and freed + * in 'cb'. + */ + +isc_result_t +isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface, + isc_nm_cb_t cb, void *cbarg, + size_t extrahandlesize, isc_quota_t *quota, + isc_nmsocket_t **rv); +/*%< + * Start listening for raw messages over the TCP interface 'iface', using + * net manager 'mgr'. + * + * On success, 'sockp' will be updated to contain a new listening TCP + * socket. + * + * When a message is received on the socket, 'cb' will be called with 'cbarg' + * as its argument. + * + * When handles are allocated for the socket, 'extrasize' additional bytes + * will be allocated along with the handle for an associated object. + * + * If 'quota' is not NULL, then the socket is attached to the specified + * quota. This allows us to enforce TCP client quota limits. + * + * NOTE: This is currently only called inside isc_nm_listentcpdns(), which + * creates a 'wrapper' socket that sends and receives DNS messages - + * prepended with a two-byte length field - and handles buffering. + */ + +void +isc_nm_tcp_stoplistening(isc_nmsocket_t *sock); +/*%< + * Stop listening on TCP socket 'sock'. + */ + +isc_result_t +isc_nm_listentcpdns(isc_nm_t *mgr, isc_nmiface_t *iface, + isc_nm_recv_cb_t cb, void *arg, + size_t extrahandlesize, isc_quota_t *quota, + isc_nmsocket_t **sockp); +/*%< + * Start listening for DNS messages over the TCP interface 'iface', using + * net manager 'mgr'. + * + * On success, 'sockp' will be updated to contain a new listening TCPDNS + * socket. This is a wrapper around a TCP socket, and handles DNS length + * processing. + * + * When a complete DNS message is received on the socket, 'cb' will be + * called with 'cbarg' as its argument. + * + * When handles are allocated for the socket, 'extrasize' additional bytes + * will be allocated along with the handle for an associated object + * (typically ns_client). + */ + +void +isc_nm_tcpdns_stoplistening(isc_nmsocket_t *sock); +/*%< + * Stop listening on TCPDNS socket 'sock'. + */ + +void +isc_nm_tcpdns_sequential(isc_nmhandle_t *handle); +/*%< + * Disable pipelining on this connection. Each DNS packet + * will be only processed after the previous completes. + * + * This cannot be reversed once set for a given connection + */ + +void +isc_nm_maxudp(isc_nm_t *mgr, uint32_t maxudp); +/*%< + * Simulate a broken firewall that blocks UDP messages larger + * than a given size. + */ diff --git a/lib/isc/include/isc/queue.h b/lib/isc/include/isc/queue.h index 43080a8134..6e3b8a3e4e 100644 --- a/lib/isc/include/isc/queue.h +++ b/lib/isc/include/isc/queue.h @@ -9,153 +9,46 @@ * information regarding copyright ownership. */ +#pragma once +#include -/* - * This is a generic implementation of a two-lock concurrent queue. - * There are built-in mutex locks for the head and tail of the queue, - * allowing elements to be safely added and removed at the same time. +typedef struct isc_queue isc_queue_t; + +isc_queue_t * +isc_queue_new(isc_mem_t *mctx, int max_threads); +/*%< + * Create a new fetch-and-add array queue. * - * NULL is "end of list" - * -1 is "not linked" + * 'max_threads' is currently unused. In the future it can be used + * to pass a maximum threads parameter when creating hazard pointers, + * but currently `isc_hp_t` uses a hard-coded value. */ -#ifndef ISC_QUEUE_H -#define ISC_QUEUE_H 1 - -#include - -#include -#include - -#ifdef ISC_QUEUE_CHECKINIT -#define ISC_QLINK_INSIST(x) ISC_INSIST(x) -#else -#define ISC_QLINK_INSIST(x) (void)0 -#endif - -#define ISC_QLINK(type) struct { type *prev, *next; } - -#define ISC_QLINK_INIT(elt, link) \ - do { \ - (elt)->link.next = (elt)->link.prev = (void *)(-1); \ - } while(0) - -#define ISC_QLINK_LINKED(elt, link) ((void*)(elt)->link.next != (void*)(-1)) - -#define ISC_QUEUE(type) struct { \ - type *head, *tail; \ - isc_mutex_t headlock, taillock; \ -} - -#define ISC_QUEUE_INIT(queue, link) \ - do { \ - isc_mutex_init(&(queue).taillock); \ - isc_mutex_init(&(queue).headlock); \ - (queue).tail = (queue).head = NULL; \ - } while (0) - -#define ISC_QUEUE_EMPTY(queue) ((queue).head == NULL) - -#define ISC_QUEUE_DESTROY(queue) \ - do { \ - ISC_QLINK_INSIST(ISC_QUEUE_EMPTY(queue)); \ - isc_mutex_destroy(&(queue).taillock); \ - isc_mutex_destroy(&(queue).headlock); \ - } while (0) - -/* - * queues are meant to separate the locks at either end. For best effect, that - * means keeping the ends separate - i.e. non-empty queues work best. - * - * a push to an empty queue has to take the pop lock to update - * the pop side of the queue. - * Popping the last entry has to take the push lock to update - * the push side of the queue. - * - * The order is (pop, push), because a pop is presumably in the - * latency path and a push is when we're done. - * - * We do an MT hot test in push to see if we need both locks, so we can - * acquire them in order. Hopefully that makes the case where we get - * the push lock and find we need the pop lock (and have to release it) rare. - * - * > 1 entry - no collision, push works on one end, pop on the other - * 0 entry - headlock race - * pop wins - return(NULL), push adds new as both head/tail - * push wins - updates head/tail, becomes 1 entry case. - * 1 entry - taillock race - * pop wins - return(pop) sets head/tail NULL, becomes 0 entry case - * push wins - updates {head,tail}->link.next, pop updates head - * with new ->link.next and doesn't update tail +void +isc_queue_enqueue(isc_queue_t *queue, uintptr_t item); +/*%< + * Enqueue an object pointer 'item' at the tail of the queue. * + * Requires: + * \li 'item' is not null. */ -#define ISC_QUEUE_PUSH(queue, elt, link) \ - do { \ - bool headlocked = false; \ - ISC_QLINK_INSIST(!ISC_QLINK_LINKED(elt, link)); \ - if ((queue).head == NULL) { \ - LOCK(&(queue).headlock); \ - headlocked = true; \ - } \ - LOCK(&(queue).taillock); \ - if ((queue).tail == NULL && !headlocked) { \ - UNLOCK(&(queue).taillock); \ - LOCK(&(queue).headlock); \ - LOCK(&(queue).taillock); \ - headlocked = true; \ - } \ - (elt)->link.prev = (queue).tail; \ - (elt)->link.next = NULL; \ - if ((queue).tail != NULL) \ - (queue).tail->link.next = (elt); \ - (queue).tail = (elt); \ - UNLOCK(&(queue).taillock); \ - if (headlocked) { \ - if ((queue).head == NULL) \ - (queue).head = (elt); \ - UNLOCK(&(queue).headlock); \ - } \ - } while (0) -#define ISC_QUEUE_POP(queue, link, ret) \ - do { \ - LOCK(&(queue).headlock); \ - ret = (queue).head; \ - while (ret != NULL) { \ - if (ret->link.next == NULL) { \ - LOCK(&(queue).taillock); \ - if (ret->link.next == NULL) { \ - (queue).head = (queue).tail = NULL; \ - UNLOCK(&(queue).taillock); \ - break; \ - }\ - UNLOCK(&(queue).taillock); \ - } \ - (queue).head = ret->link.next; \ - (queue).head->link.prev = NULL; \ - break; \ - } \ - UNLOCK(&(queue).headlock); \ - if (ret != NULL) \ - (ret)->link.next = (ret)->link.prev = (void *)(-1); \ - } while(0) +uintptr_t +isc_queue_dequeue(isc_queue_t *queue); +/*%< + * Remove an object pointer from the head of the queue and return the + * pointer. If the queue is empty, return `nulluintptr` (the uintptr_t + * representation of NULL). + * + * Requires: + * \li 'queue' is not null. + */ -#define ISC_QUEUE_UNLINK(queue, elt, link) \ - do { \ - ISC_QLINK_INSIST(ISC_QLINK_LINKED(elt, link)); \ - LOCK(&(queue).headlock); \ - LOCK(&(queue).taillock); \ - if ((elt)->link.prev == NULL) \ - (queue).head = (elt)->link.next; \ - else \ - (elt)->link.prev->link.next = (elt)->link.next; \ - if ((elt)->link.next == NULL) \ - (queue).tail = (elt)->link.prev; \ - else \ - (elt)->link.next->link.prev = (elt)->link.prev; \ - UNLOCK(&(queue).taillock); \ - UNLOCK(&(queue).headlock); \ - (elt)->link.next = (elt)->link.prev = (void *)(-1); \ - } while(0) - -#endif /* ISC_QUEUE_H */ +void +isc_queue_destroy(isc_queue_t *queue); +/*%< + * Destroy a queue. + * + * Requires: + * \li 'queue' is not null. + */ diff --git a/lib/isc/include/isc/result.h b/lib/isc/include/isc/result.h index 96f5f2bdff..a3fbeb047a 100644 --- a/lib/isc/include/isc/result.h +++ b/lib/isc/include/isc/result.h @@ -9,7 +9,6 @@ * information regarding copyright ownership. */ - #ifndef ISC_RESULT_H #define ISC_RESULT_H 1 diff --git a/lib/isc/include/isc/sockaddr.h b/lib/isc/include/isc/sockaddr.h index 478e77c727..1588ba9645 100644 --- a/lib/isc/include/isc/sockaddr.h +++ b/lib/isc/include/isc/sockaddr.h @@ -230,6 +230,10 @@ isc_sockaddr_frompath(isc_sockaddr_t *sockaddr, const char *path); * \li ISC_R_SUCCESS */ +isc_result_t +isc_sockaddr_fromsockaddr(isc_sockaddr_t *isa, const struct sockaddr *sa); + + #define ISC_SOCKADDR_FORMATSIZE \ sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:XXX.XXX.XXX.XXX%SSSSSSSSSS#YYYYY") /*%< diff --git a/lib/isc/include/isc/socket.h b/lib/isc/include/isc/socket.h index 7291387338..8d0c621ddf 100644 --- a/lib/isc/include/isc/socket.h +++ b/lib/isc/include/isc/socket.h @@ -524,7 +524,7 @@ isc_result_t isc_socket_filter(isc_socket_t *sock, const char *filter); /*%< * Inform the kernel that it should perform accept filtering. - * If filter is NULL the current filter will be removed.:w + * If filter is NULL the current filter will be removed. */ isc_result_t diff --git a/lib/isc/include/isc/task.h b/lib/isc/include/isc/task.h index 10e8948268..40fa7db29c 100644 --- a/lib/isc/include/isc/task.h +++ b/lib/isc/include/isc/task.h @@ -78,6 +78,7 @@ #include #include #include +#include #include #define ISC_TASKEVENT_FIRSTEVENT (ISC_EVENTCLASS_TASK + 0) @@ -544,6 +545,8 @@ isc_task_beginexclusive(isc_task_t *task); * task. Waits for any other concurrently executing tasks to finish their * current event, and prevents any new events from executing in any of the * tasks sharing a task manager with 'task'. + * It also pauses processing of network events in netmgr if it was provided + * when taskmgr was created. * * The exclusive access must be relinquished by calling * isc_task_endexclusive() before returning from the current event handler. @@ -568,6 +571,22 @@ isc_task_endexclusive(isc_task_t *task); * exclusive access by calling isc_task_spl(). */ +void +isc_task_pause(isc_task_t *task0); +void +isc_task_unpause(isc_task_t *task0); +/*%< + * Pause/unpause this task. Pausing a task removes it from the ready + * queue if it is present there; this ensures that the task will not + * run again until unpaused. This is necessary when the libuv network + * thread executes a function which schedules task manager events; this + * prevents the task manager from executing the next event in a task + * before the network thread has finished. + * + * Requires: + *\li 'task' is a valid task, and is not already paused or shutting down. + */ + void isc_task_getcurrenttime(isc_task_t *task, isc_stdtime_t *t); void @@ -633,7 +652,8 @@ isc_taskmgr_createinctx(isc_mem_t *mctx, isc_taskmgr_t **managerp); isc_result_t isc_taskmgr_create(isc_mem_t *mctx, unsigned int workers, - unsigned int default_quantum, isc_taskmgr_t **managerp); + unsigned int default_quantum, + isc_nm_t *nm, isc_taskmgr_t **managerp); /*%< * Create a new task manager. isc_taskmgr_createinctx() also associates * the new manager with the specified application context. @@ -650,6 +670,9 @@ isc_taskmgr_create(isc_mem_t *mctx, unsigned int workers, * quantum value when tasks are created. If zero, then an implementation * defined default quantum will be used. * + *\li If 'nm' is set then netmgr is paused when an exclusive task mode + * is requested. + * * Requires: * *\li 'mctx' is a valid memory context. diff --git a/lib/isc/include/isc/types.h b/lib/isc/include/isc/types.h index 168076daf6..3d1eb779b7 100644 --- a/lib/isc/include/isc/types.h +++ b/lib/isc/include/isc/types.h @@ -30,6 +30,7 @@ /* Core Types. Alphabetized by defined type. */ +typedef struct isc_astack isc_astack_t; /*%< Array-based fast stack */ typedef struct isc_appctx isc_appctx_t; /*%< Application context */ typedef struct isc_backtrace_symmap isc_backtrace_symmap_t; /*%< Symbol Table Entry */ typedef struct isc_buffer isc_buffer_t; /*%< Buffer */ @@ -43,6 +44,8 @@ typedef ISC_LIST(isc_event_t) isc_eventlist_t; /*%< Event List */ typedef unsigned int isc_eventtype_t; /*%< Event Type */ typedef uint32_t isc_fsaccess_t; /*%< FS Access */ typedef struct isc_hash isc_hash_t; /*%< Hash */ +typedef struct isc_hp isc_hp_t; /*%< Hazard + pointer */ typedef struct isc_httpd isc_httpd_t; /*%< HTTP client */ typedef void (isc_httpdfree_t)(isc_buffer_t *, void *); /*%< HTTP free function */ typedef struct isc_httpdmgr isc_httpdmgr_t; /*%< HTTP manager */ @@ -59,6 +62,10 @@ typedef struct isc_logmodule isc_logmodule_t; /*%< Log Module */ typedef struct isc_mem isc_mem_t; /*%< Memory */ typedef struct isc_mempool isc_mempool_t; /*%< Memory Pool */ typedef struct isc_netaddr isc_netaddr_t; /*%< Net Address */ +typedef struct isc_nm isc_nm_t; /*%< Network manager */ +typedef struct isc_nmsocket isc_nmsocket_t; /*%< Network manager socket */ +typedef struct isc_nmiface isc_nmiface_t; /*%< Network manager interface. */ +typedef struct isc_nmhandle isc_nmhandle_t; /*%< Network manager handle */ typedef struct isc_portset isc_portset_t; /*%< Port Set */ typedef struct isc_quota isc_quota_t; /*%< Quota */ typedef struct isc_ratelimiter isc_ratelimiter_t; /*%< Rate Limiter */ diff --git a/lib/isc/include/pk11/result.h b/lib/isc/include/pk11/result.h index 21f4a37529..e5f8102dba 100644 --- a/lib/isc/include/pk11/result.h +++ b/lib/isc/include/pk11/result.h @@ -16,6 +16,7 @@ #include #include +#include /* * Nothing in this file truly depends on , but the diff --git a/lib/isc/netmgr/Makefile.in b/lib/isc/netmgr/Makefile.in new file mode 100644 index 0000000000..d931b7c26d --- /dev/null +++ b/lib/isc/netmgr/Makefile.in @@ -0,0 +1,33 @@ +# Copyright (C) Internet Systems Consortium, Inc. ("ISC") +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# See the COPYRIGHT file distributed with this work for additional +# information regarding copyright ownership. + +srcdir = @srcdir@ +VPATH = @srcdir@ +top_srcdir = @top_srcdir@ + +CINCLUDES = -I${srcdir}/../include \ + -I${srcdir}/../unix/include \ + -I${srcdir}/../pthreads/include \ + -I${srcdir}/.. \ + ${OPENSSL_CFLAGS} \ + ${JSON_C_CFLAGS} \ + ${LIBXML2_CFLAGS} + +CDEFINES = +CWARNINGS = + +# Alphabetically +OBJS = netmgr.@O@ tcp.@O@ udp.@O@ tcpdns.@O@ uverr2result.@O@ + +# Alphabetically +SRCS = netmgr.c tcp.c udp.c tcpdns.c uverr2result.c + +TARGETS = ${OBJS} + +@BIND9_MAKE_RULES@ diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h new file mode 100644 index 0000000000..b95be9873b --- /dev/null +++ b/lib/isc/netmgr/netmgr-int.h @@ -0,0 +1,547 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ISC_NETMGR_TID_UNKNOWN -1 +#define ISC_NETMGR_TID_NOTLS -2 + +/* + * Single network event loop worker. + */ +typedef struct isc__networker { + isc_nm_t * mgr; + int id; /* thread id */ + uv_loop_t loop; /* libuv loop structure */ + uv_async_t async; /* async channel to send + * data to this networker */ + isc_mutex_t lock; + isc_mempool_t *mpool_bufs; + isc_condition_t cond; + bool paused; + bool finished; + isc_thread_t thread; + isc_queue_t *ievents; /* incoming async events */ + isc_refcount_t references; + atomic_int_fast64_t pktcount; + char udprecvbuf[65536]; + bool udprecvbuf_inuse; +} isc__networker_t; + +/* + * A general handle for a connection bound to a networker. For UDP + * connections we have peer address here, so both TCP and UDP can be + * handled with a simple send-like function + */ +#define NMHANDLE_MAGIC ISC_MAGIC('N', 'M', 'H', 'D') +#define VALID_NMHANDLE(t) ISC_MAGIC_VALID(t, \ + NMHANDLE_MAGIC) + +typedef void (*isc__nm_closecb)(isc_nmhandle_t *); + +struct isc_nmhandle { + int magic; + isc_refcount_t references; + + /* + * The socket is not 'attached' in the traditional + * reference-counting sense. Instead, we keep all handles in an + * array in the socket object. This way, we don't have circular + * dependencies and we can close all handles when we're destroying + * the socket. + */ + isc_nmsocket_t *sock; + size_t ah_pos; /* Position in the socket's + * 'active handles' array */ + + /* + * The handle is 'inflight' if netmgr is not currently processing + * it in any way - it might mean that e.g. a recursive resolution + * is happening. For an inflight handle we must wait for the + * calling code to finish before we can free it. + */ + atomic_bool inflight; + + isc_sockaddr_t peer; + isc_sockaddr_t local; + isc_nm_opaquecb doreset; /* reset extra callback, external */ + isc_nm_opaquecb dofree; /* free extra callback, external */ + void * opaque; + char extra[]; +}; + +/* + * An interface - an address we can listen on. + */ +struct isc_nmiface { + isc_sockaddr_t addr; +}; + +typedef enum isc__netievent_type { + netievent_stop, + netievent_udplisten, + netievent_udpstoplisten, + netievent_udpsend, + netievent_udprecv, + netievent_tcpconnect, + netievent_tcpsend, + netievent_tcprecv, + netievent_tcpstartread, + netievent_tcppauseread, + netievent_tcplisten, + netievent_tcpstoplisten, + netievent_tcpclose, +} isc__netievent_type; + +typedef struct isc__netievent_stop { + isc__netievent_type type; +} isc__netievent_stop_t; + +/* + * We have to split it because we can read and write on a socket + * simultaneously. + */ +typedef union { + isc_nm_recv_cb_t recv; + isc_nm_cb_t accept; +} isc__nm_readcb_t; + +typedef union { + isc_nm_cb_t send; + isc_nm_cb_t connect; +} isc__nm_writecb_t; + +typedef union { + isc_nm_recv_cb_t recv; + isc_nm_cb_t accept; + isc_nm_cb_t send; + isc_nm_cb_t connect; +} isc__nm_cb_t; + +/* + * Wrapper around uv_req_t with 'our' fields in it. req->data should + * always point to its parent. Note that we always allocate more than + * sizeof(struct) because we make room for different req types; + */ +#define UVREQ_MAGIC ISC_MAGIC('N', 'M', 'U', 'R') +#define VALID_UVREQ(t) ISC_MAGIC_VALID(t, UVREQ_MAGIC) + +typedef struct isc__nm_uvreq { + int magic; + isc_nmsocket_t * sock; + isc_nmhandle_t * handle; + uv_buf_t uvbuf; /* translated isc_region_t, to be + sent or received */ + isc_sockaddr_t local; /* local address */ + isc_sockaddr_t peer; /* peer address */ + isc__nm_cb_t cb; /* callback */ + void * cbarg; /* callback argument */ + union { + uv_req_t req; + uv_getaddrinfo_t getaddrinfo; + uv_getnameinfo_t getnameinfo; + uv_shutdown_t shutdown; + uv_write_t write; + uv_connect_t connect; + uv_udp_send_t udp_send; + uv_fs_t fs; + uv_work_t work; + } uv_req; +} isc__nm_uvreq_t; + +typedef struct isc__netievent__socket { + isc__netievent_type type; + isc_nmsocket_t *sock; +} isc__netievent__socket_t; + +typedef isc__netievent__socket_t isc__netievent_udplisten_t; +typedef isc__netievent__socket_t isc__netievent_udpstoplisten_t; +typedef isc__netievent__socket_t isc__netievent_tcpstoplisten_t; +typedef isc__netievent__socket_t isc__netievent_tcpclose_t; +typedef isc__netievent__socket_t isc__netievent_startread_t; +typedef isc__netievent__socket_t isc__netievent_pauseread_t; +typedef isc__netievent__socket_t isc__netievent_resumeread_t; + +typedef struct isc__netievent__socket_req { + isc__netievent_type type; + isc_nmsocket_t *sock; + isc__nm_uvreq_t *req; +} isc__netievent__socket_req_t; + +typedef isc__netievent__socket_req_t isc__netievent_tcpconnect_t; +typedef isc__netievent__socket_req_t isc__netievent_tcplisten_t; +typedef isc__netievent__socket_req_t isc__netievent_tcpsend_t; + +typedef struct isc__netievent_udpsend { + isc__netievent_type type; + isc_nmsocket_t *sock; + isc_sockaddr_t peer; + isc__nm_uvreq_t *req; +} isc__netievent_udpsend_t; + +typedef struct isc__netievent { + isc__netievent_type type; +} isc__netievent_t; + +typedef union { + isc__netievent_t ni; + isc__netievent_stop_t nis; + isc__netievent_udplisten_t niul; + isc__netievent_udpsend_t nius; +} isc__netievent_storage_t; + +/* + * Network manager + */ +#define NM_MAGIC ISC_MAGIC('N', 'E', 'T', 'M') +#define VALID_NM(t) ISC_MAGIC_VALID(t, NM_MAGIC) + +struct isc_nm { + int magic; + isc_refcount_t references; + isc_mem_t *mctx; + uint32_t nworkers; + isc_mutex_t lock; + isc_condition_t wkstatecond; + isc__networker_t *workers; + atomic_uint_fast32_t workers_running; + atomic_uint_fast32_t workers_paused; + atomic_uint_fast32_t maxudp; + atomic_bool paused; + + /* + * A worker is actively waiting for other workers, for example to + * stop listening; that means no other thread can do the same thing + * or pause, or we'll deadlock. We have to either re-enqueue our + * event or wait for the other one to finish if we want to pause. + */ + atomic_bool interlocked; +}; + +typedef enum isc_nmsocket_type { + isc_nm_udpsocket, + isc_nm_udplistener, /* Aggregate of nm_udpsocks */ + isc_nm_tcpsocket, + isc_nm_tcplistener, + isc_nm_tcpdnslistener, + isc_nm_tcpdnssocket +} isc_nmsocket_type; + +/*% + * A universal structure for either a single socket or a group of + * dup'd/SO_REUSE_PORT-using sockets listening on the same interface. + */ +#define NMSOCK_MAGIC ISC_MAGIC('N', 'M', 'S', 'K') +#define VALID_NMSOCK(t) ISC_MAGIC_VALID(t, NMSOCK_MAGIC) + +struct isc_nmsocket { + /*% Unlocked, RO */ + int magic; + int tid; + isc_nmsocket_type type; + isc_nm_t *mgr; + isc_nmsocket_t *parent; + isc_quota_t *quota; + bool overquota; + + /*% outer socket is for 'wrapped' sockets - e.g. tcpdns in tcp */ + isc_nmsocket_t *outer; + + /*% server socket for connections */ + isc_nmsocket_t *server; + + /*% children sockets for multi-socket setups */ + isc_nmsocket_t *children; + int nchildren; + isc_nmiface_t *iface; + isc_nmhandle_t *tcphandle; + + /*% extra data allocated at the end of each isc_nmhandle_t */ + size_t extrahandlesize; + + /*% libuv data */ + uv_os_sock_t fd; + union uv_any_handle uv_handle; + + isc_sockaddr_t peer; + + /* Atomic */ + /*% Number of running (e.g. listening) children sockets */ + atomic_int_fast32_t rchildren; + + /*% + * Socket if active if it's listening, working, etc., if we're + * closing a socket it doesn't make any sense to e.g. still + * push handles or reqs for reuse + */ + atomic_bool active; + atomic_bool destroying; + + /*% + * Socket is closed if it's not active and all the possible + * callbacks were fired, there are no active handles, etc. + * active==false, closed==false means the socket is closing. + */ + atomic_bool closed; + atomic_bool listening; + isc_refcount_t references; + + /*% + * TCPDNS socket is not pipelining. + */ + atomic_bool sequential; + /*% + * TCPDNS socket in sequential mode is currently processing a packet, + * we need to wait until it finishes. + */ + atomic_bool processing; + + /*% + * 'spare' handles for that can be reused to avoid allocations, + * for UDP. + */ + isc_astack_t *inactivehandles; + isc_astack_t *inactivereqs; + + /* Used for active/rchildren during shutdown */ + isc_mutex_t lock; + isc_condition_t cond; + + /*% + * List of active handles. + * ah_size - size of ah_frees and ah_handles + * ah_cpos - current position in ah_frees; + * ah_handles - array of *handles. + * Adding a handle + * - if ah_cpos == ah_size, realloc + * - x = ah_frees[ah_cpos] + * - ah_frees[ah_cpos++] = 0; + * - ah_handles[x] = handle + * - x must be stored with the handle! + * Removing a handle: + * - ah_frees[--ah_cpos] = x + * - ah_handles[x] = NULL; + * + * XXXWPK for now this is locked with socket->lock, but we might want + * to change it to something lockless + */ + size_t ah_size; + size_t ah_cpos; + size_t *ah_frees; + isc_nmhandle_t **ah_handles; + + /* Buffer for TCPDNS processing, optional */ + size_t buf_size; + size_t buf_len; + unsigned char *buf; + + isc__nm_readcb_t rcb; + void *rcbarg; +}; + +bool +isc__nm_in_netthread(void); +/*% + * Returns 'true' if we're in the network thread. + */ + +void * +isc__nm_get_ievent(isc_nm_t *mgr, isc__netievent_type type); +/*%< + * Allocate an ievent and set the type. + */ + +void +isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event); +/*%< + * Enqueue an ievent onto a specific worker queue. (This the only safe + * way to use an isc__networker_t from another thread.) + */ + +void +isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf); +/*%< + * Allocator for recv operations. + * + * Note that as currently implemented, this doesn't actually + * allocate anything, it just assigns the the isc__networker's UDP + * receive buffer to a socket, and marks it as "in use". + */ + +void +isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf); +/*%< + * Free a buffer allocated for a receive operation. + * + * Note that as currently implemented, this doesn't actually + * free anything, marks the isc__networker's UDP receive buffer + * as "not in use". + */ + + +isc_nmhandle_t * +isc__nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, + isc_sockaddr_t *local); +/*%< + * Get a handle for the socket 'sock', allocating a new one + * if there isn't one availbale in 'sock->inactivehandles'. + * + * If 'peer' is not NULL, set the handle's peer address to 'peer', + * otherwise set it to 'sock->peer'. + * + * If 'local' is not NULL, set the handle's local address to 'local', + * otherwise set it to 'sock->iface->addr'. + */ + +isc__nm_uvreq_t * +isc__nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock); +/*%< + * Get a UV request structure for the socket 'sock', allocating a + * new one if there isn't one availbale in 'sock->inactivereqs'. + */ + +void +isc__nm_uvreq_put(isc__nm_uvreq_t **req, isc_nmsocket_t *sock); +/*%< + * Completes the use of a UV request structure, setting '*req' to NULL. + * + * The UV request is pushed onto the 'sock->inactivereqs' stack or, + * if that doesn't work, freed. + */ + +void +isc__nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, + isc_nmsocket_type type); +/*%< + * Initialize socket 'sock', attach it to 'mgr', and set it to type 'type'. + */ + +void +isc__nmsocket_prep_destroy(isc_nmsocket_t *sock); +/*%< + * Market 'sock' as inactive, close it if necessary, and destroy it + * if there are no remaining references or active handles. + */ + +isc_result_t +isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region, + isc_nm_cb_t cb, void *cbarg); +/*%< + * Back-end implemenation of isc_nm_send() for UDP handles. + */ + +void +isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ievent0); + +void +isc__nm_async_udpstoplisten(isc__networker_t *worker, + isc__netievent_t *ievent0); +void +isc__nm_async_udpsend(isc__networker_t *worker, isc__netievent_t *ievent0); +/*%< + * Callback handlers for asynchronous UDP events (listen, stoplisten, send). + */ + +isc_result_t +isc__nm_tcp_send(isc_nmhandle_t *handle, isc_region_t *region, + isc_nm_cb_t cb, void *cbarg); +/*%< + * Back-end implemenation of isc_nm_send() for TCP handles. + */ + +void +isc__nm_tcp_close(isc_nmsocket_t *sock); +/*%< + * Close a TCP socket. + */ + +void +isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ievent0); +void +isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ievent0); +void +isc__nm_async_tcpstoplisten(isc__networker_t *worker, + isc__netievent_t *ievent0); +void +isc__nm_async_tcpsend(isc__networker_t *worker, isc__netievent_t *ievent0); +void +isc__nm_async_startread(isc__networker_t *worker, isc__netievent_t *ievent0); +void +isc__nm_async_pauseread(isc__networker_t *worker, isc__netievent_t *ievent0); +void +isc__nm_async_resumeread(isc__networker_t *worker, isc__netievent_t *ievent0); +void +isc__nm_async_tcpclose(isc__networker_t *worker, isc__netievent_t *ievent0); +/*%< + * Callback handlers for asynchronous TCP events (connect, listen, + * stoplisten, send, read, pauseread, resumeread, close). + */ + + +isc_result_t +isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region, + isc_nm_cb_t cb, void *cbarg); +/*%< + * Back-end implemenation of isc_nm_send() for TCPDNS handles. + */ + +void +isc__nm_tcpdns_close(isc_nmsocket_t *sock); +/*%< + * Close a TCPDNS socket. + */ + +#define isc__nm_uverr2result(x) \ + isc___nm_uverr2result(x, true, __FILE__, __LINE__) +isc_result_t +isc___nm_uverr2result(int uverr, bool dolog, + const char *file, unsigned int line); +/*%< + * Convert a libuv error value into an isc_result_t. The + * list of supported error values is not complete; new users + * of this function should add any expected errors that are + * not already there. + */ + +bool +isc__nm_acquire_interlocked(isc_nm_t *mgr); +/*%< + * Try to acquire interlocked state; return true if successful. + */ + +void +isc__nm_drop_interlocked(isc_nm_t *mgr); +/*%< + * Drop interlocked state; signal waiters. + */ + +void +isc__nm_acquire_interlocked_force(isc_nm_t *mgr); +/*%< + * Actively wait for interlocked state. + */ diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c new file mode 100644 index 0000000000..962bc1af03 --- /dev/null +++ b/lib/isc/netmgr/netmgr.c @@ -0,0 +1,1056 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "netmgr-int.h" + +/* + * libuv is not thread safe, but has mechanisms to pass messages + * between threads. Each socket is owned by a thread. For UDP + * sockets we have a set of sockets for each interface and we can + * choose a sibling and send the message directly. For TCP, or if + * we're calling from a non-networking thread, we need to pass the + * request using async_cb. + */ + +#if defined(HAVE_TLS) +#if defined(HAVE_THREAD_LOCAL) +#include +static thread_local int isc__nm_tid_v = ISC_NETMGR_TID_UNKNOWN; +#elif defined(HAVE___THREAD) +static __thread int isc__nm_tid_v = ISC_NETMGR_TID_UNKNOWN; +#elif defined(HAVE___DECLSPEC_THREAD) +static __declspec( thread ) int isc__nm_tid_v = ISC_NETMGR_TID_UNKNOWN; +#else /* if defined(HAVE_THREAD_LOCAL) */ +#error "Unknown method for defining a TLS variable!" +#endif /* if defined(HAVE_THREAD_LOCAL) */ +#else /* if defined(HAVE_TLS) */ +static int isc__nm_tid_v = ISC_NETMGR_TID_NOTLS; +#endif /* if defined(HAVE_TLS) */ + +static void +nmsocket_maybe_destroy(isc_nmsocket_t *sock); +static void +nmhandle_free(isc_nmsocket_t *sock, isc_nmhandle_t *handle); +static void * +nm_thread(void *worker0); +static void +async_cb(uv_async_t *handle); + +int +isc_nm_tid() { + return (isc__nm_tid_v); +} + +bool +isc__nm_in_netthread() { + return (isc__nm_tid_v >= 0); +} + +isc_nm_t * +isc_nm_start(isc_mem_t *mctx, uint32_t workers) { + isc_nm_t *mgr = NULL; + char name[32]; + + mgr = isc_mem_get(mctx, sizeof(*mgr)); + *mgr = (isc_nm_t) { + .nworkers = workers + }; + + isc_mem_attach(mctx, &mgr->mctx); + isc_mutex_init(&mgr->lock); + isc_condition_init(&mgr->wkstatecond); + isc_refcount_init(&mgr->references, 1); + atomic_init(&mgr->workers_running, 0); + atomic_init(&mgr->workers_paused, 0); + atomic_init(&mgr->maxudp, 0); + atomic_init(&mgr->paused, false); + atomic_init(&mgr->interlocked, false); + + mgr->workers = isc_mem_get(mctx, workers * sizeof(isc__networker_t)); + for (size_t i = 0; i < workers; i++) { + int r; + isc__networker_t *worker = &mgr->workers[i]; + *worker = (isc__networker_t) { + .mgr = mgr, + .id = i, + }; + + r = uv_loop_init(&worker->loop); + RUNTIME_CHECK(r == 0); + + worker->loop.data = &mgr->workers[i]; + + r = uv_async_init(&worker->loop, &worker->async, async_cb); + RUNTIME_CHECK(r == 0); + + isc_mutex_init(&worker->lock); + isc_condition_init(&worker->cond); + + isc_mempool_create(mgr->mctx, 65536, &worker->mpool_bufs); + worker->ievents = isc_queue_new(mgr->mctx, 128); + + /* + * We need to do this here and not in nm_thread to avoid a + * race - we could exit isc_nm_start, launch nm_destroy, + * and nm_thread would still not be up. + */ + atomic_fetch_add_explicit(&mgr->workers_running, 1, + memory_order_relaxed); + isc_thread_create(nm_thread, &mgr->workers[i], &worker->thread); + + snprintf(name, sizeof(name), "isc-net-%04zu", i); + isc_thread_setname(worker->thread, name); + } + + mgr->magic = NM_MAGIC; + return (mgr); +} + +/* + * Free the resources of the network manager. + * + * TODO we need to clean up properly - launch all missing callbacks, + * destroy all listeners, etc. + */ +static void +nm_destroy(isc_nm_t **mgr0) { + REQUIRE(VALID_NM(*mgr0)); + REQUIRE(!isc__nm_in_netthread()); + + isc_nm_t *mgr = *mgr0; + + LOCK(&mgr->lock); + mgr->magic = 0; + + for (size_t i = 0; i < mgr->nworkers; i++) { + isc__netievent_t *event = NULL; + + LOCK(&mgr->workers[i].lock); + mgr->workers[i].finished = true; + UNLOCK(&mgr->workers[i].lock); + event = isc__nm_get_ievent(mgr, netievent_stop); + isc__nm_enqueue_ievent(&mgr->workers[i], event); + } + + while (atomic_load(&mgr->workers_running) > 0) { + WAIT(&mgr->wkstatecond, &mgr->lock); + } + UNLOCK(&mgr->lock); + + for (size_t i = 0; i < mgr->nworkers; i++) { + /* Empty the async event queue */ + isc__netievent_t *ievent; + while ((ievent = (isc__netievent_t *) + isc_queue_dequeue(mgr->workers[i].ievents)) != NULL) + { + isc_mem_put(mgr->mctx, ievent, + sizeof(isc__netievent_storage_t)); + } + isc_queue_destroy(mgr->workers[i].ievents); + isc_mempool_destroy(&mgr->workers[i].mpool_bufs); + } + + isc_condition_destroy(&mgr->wkstatecond); + isc_mutex_destroy(&mgr->lock); + isc_mem_put(mgr->mctx, mgr->workers, + mgr->nworkers * sizeof(isc__networker_t)); + isc_mem_putanddetach(&mgr->mctx, mgr, sizeof(*mgr)); + *mgr0 = NULL; +} + +void +isc_nm_pause(isc_nm_t *mgr) { + REQUIRE(VALID_NM(mgr)); + REQUIRE(!isc__nm_in_netthread()); + + atomic_store(&mgr->paused, true); + isc__nm_acquire_interlocked_force(mgr); + + for (size_t i = 0; i < mgr->nworkers; i++) { + isc__netievent_t *event = NULL; + + LOCK(&mgr->workers[i].lock); + mgr->workers[i].paused = true; + UNLOCK(&mgr->workers[i].lock); + + /* + * We have to issue a stop, otherwise the uv_run loop will + * run indefinitely! + */ + event = isc__nm_get_ievent(mgr, netievent_stop); + isc__nm_enqueue_ievent(&mgr->workers[i], event); + } + + LOCK(&mgr->lock); + while (atomic_load_relaxed(&mgr->workers_paused) != + atomic_load_relaxed(&mgr->workers_running)) + { + WAIT(&mgr->wkstatecond, &mgr->lock); + } + UNLOCK(&mgr->lock); +} + +void +isc_nm_resume(isc_nm_t *mgr) { + REQUIRE(VALID_NM(mgr)); + REQUIRE(!isc__nm_in_netthread()); + + for (size_t i = 0; i < mgr->nworkers; i++) { + LOCK(&mgr->workers[i].lock); + mgr->workers[i].paused = false; + SIGNAL(&mgr->workers[i].cond); + UNLOCK(&mgr->workers[i].lock); + } + isc__nm_drop_interlocked(mgr); + + /* + * We're not waiting for all the workers to come back to life; + * they eventually will, we don't care. + */ +} + +void +isc_nm_attach(isc_nm_t *mgr, isc_nm_t **dst) { + int refs; + + REQUIRE(VALID_NM(mgr)); + REQUIRE(dst != NULL && *dst == NULL); + + refs = isc_refcount_increment(&mgr->references); + INSIST(refs > 0); + + *dst = mgr; +} + +void +isc_nm_detach(isc_nm_t **mgr0) { + isc_nm_t *mgr = NULL; + int references; + + REQUIRE(mgr0 != NULL); + REQUIRE(VALID_NM(*mgr0)); + + mgr = *mgr0; + *mgr0 = NULL; + + references = isc_refcount_decrement(&mgr->references); + INSIST(references > 0); + if (references == 1) { + nm_destroy(&mgr); + } +} + + +void +isc_nm_destroy(isc_nm_t **mgr0) { + isc_nm_t *mgr = NULL; + int references; + + REQUIRE(mgr0 != NULL); + REQUIRE(VALID_NM(*mgr0)); + + mgr = *mgr0; + *mgr0 = NULL; + + /* + * Wait for the manager to be dereferenced elsehwere. + */ + while (isc_refcount_current(&mgr->references) > 1) { +#ifdef WIN32 + _sleep(1000); +#else + usleep(1000000); +#endif + } + references = isc_refcount_decrement(&mgr->references); + INSIST(references > 0); + if (references == 1) { + nm_destroy(&mgr); + } +} + +void +isc_nm_maxudp(isc_nm_t *mgr, uint32_t maxudp) { + REQUIRE(VALID_NM(mgr)); + + atomic_store(&mgr->maxudp, maxudp); +} + +/* + * nm_thread is a single worker thread, that runs uv_run event loop + * until asked to stop. + */ +static void * +nm_thread(void *worker0) { + isc__networker_t *worker = (isc__networker_t *) worker0; + + isc__nm_tid_v = worker->id; + isc_thread_setaffinity(isc__nm_tid_v); + + while (true) { + int r = uv_run(&worker->loop, UV_RUN_DEFAULT); + bool pausing = false; + + /* + * or there's nothing to do. In the first case - wait + * for condition. In the latter - timedwait + */ + LOCK(&worker->lock); + while (worker->paused) { + LOCK(&worker->mgr->lock); + if (!pausing) { + atomic_fetch_add_explicit( + &worker->mgr->workers_paused, + 1, memory_order_acquire); + pausing = true; + } + + SIGNAL(&worker->mgr->wkstatecond); + UNLOCK(&worker->mgr->lock); + + WAIT(&worker->cond, &worker->lock); + } + if (pausing) { + uint32_t wp = atomic_fetch_sub_explicit( + &worker->mgr->workers_paused, + 1, memory_order_release); + if (wp == 1) { + atomic_store(&worker->mgr->paused, false); + } + } + UNLOCK(&worker->lock); + + if (worker->finished) { + /* TODO walk the handles and free them! */ + break; + } + + if (r == 0) { + /* + * TODO it should never happen - we don't have + * any sockets we're listening on? + */ +#ifdef WIN32 + _sleep(100); +#else + usleep(100000); +#endif + } + + /* + * Empty the async queue. + */ + async_cb(&worker->async); + } + + LOCK(&worker->mgr->lock); + atomic_fetch_sub_explicit(&worker->mgr->workers_running, 1, + memory_order_relaxed); + SIGNAL(&worker->mgr->wkstatecond); + UNLOCK(&worker->mgr->lock); + return (NULL); +} + +/* + * async_cb is an universal callback for 'async' events sent to event loop. + * It's the only way to safely pass data to libuv event loop. We use a single + * async event and a lockless queue of 'isc__netievent_t' structures passed + * from other threads. + */ +static void +async_cb(uv_async_t *handle) { + isc__networker_t *worker = (isc__networker_t *) handle->loop->data; + isc__netievent_t *ievent; + + /* + * We only try dequeue to not waste time, libuv guarantees + * that if someone calls uv_async_send -after- async_cb was called + * then async_cb will be called again, we won't loose any signals. + */ + while ((ievent = (isc__netievent_t *) + isc_queue_dequeue(worker->ievents)) != NULL) + { + switch (ievent->type) { + case netievent_stop: + uv_stop(handle->loop); + isc_mem_put(worker->mgr->mctx, ievent, + sizeof(isc__netievent_storage_t)); + return; + case netievent_udplisten: + isc__nm_async_udplisten(worker, ievent); + break; + case netievent_udpstoplisten: + isc__nm_async_udpstoplisten(worker, ievent); + break; + case netievent_udpsend: + isc__nm_async_udpsend(worker, ievent); + break; + case netievent_tcpconnect: + isc__nm_async_tcpconnect(worker, ievent); + break; + case netievent_tcplisten: + isc__nm_async_tcplisten(worker, ievent); + break; + case netievent_tcpstartread: + isc__nm_async_startread(worker, ievent); + break; + case netievent_tcppauseread: + isc__nm_async_pauseread(worker, ievent); + break; + case netievent_tcpsend: + isc__nm_async_tcpsend(worker, ievent); + break; + case netievent_tcpstoplisten: + isc__nm_async_tcpstoplisten(worker, ievent); + break; + case netievent_tcpclose: + isc__nm_async_tcpclose(worker, ievent); + break; + default: + INSIST(0); + ISC_UNREACHABLE(); + } + isc_mem_put(worker->mgr->mctx, ievent, + sizeof(isc__netievent_storage_t)); + } +} + +void * +isc__nm_get_ievent(isc_nm_t *mgr, isc__netievent_type type) { + isc__netievent_storage_t *event = + isc_mem_get(mgr->mctx, sizeof(isc__netievent_storage_t)); + + /* XXX: use a memory pool? */ + *event = (isc__netievent_storage_t) { + .ni.type = type + }; + return (event); +} + +void +isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event) { + isc_queue_enqueue(worker->ievents, (uintptr_t)event); + uv_async_send(&worker->async); +} + +static bool +isc__nmsocket_active(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + if (sock->parent != NULL) { + return (atomic_load(&sock->parent->active)); + } + + return (atomic_load(&sock->active)); +} + +void +isc_nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(target != NULL && *target == NULL); + + if (sock->parent != NULL) { + INSIST(sock->parent->parent == NULL); /* sanity check */ + isc_refcount_increment(&sock->parent->references); + } else { + isc_refcount_increment(&sock->references); + } + + *target = sock; +} + +/* + * Free all resources inside a socket (including its children if any). + */ +static void +nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree) { + isc_nmhandle_t *handle = NULL; + isc__nm_uvreq_t *uvreq = NULL; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(!isc__nmsocket_active(sock)); + + atomic_store(&sock->destroying, true); + + if (sock->parent == NULL && sock->children != NULL) { + /* + * We shouldn't be here unless there are no active handles, + * so we can clean up and free the children. + */ + for (int i = 0; i < sock->nchildren; i++) { + if (!atomic_load(&sock->children[i].destroying)) { + nmsocket_cleanup(&sock->children[i], false); + } + } + + /* + * This was a parent socket; free the children. + */ + isc_mem_put(sock->mgr->mctx, sock->children, + sock->nchildren * sizeof(*sock)); + sock->children = NULL; + sock->nchildren = 0; + } + + if (sock->tcphandle != NULL) { + isc_nmhandle_unref(sock->tcphandle); + sock->tcphandle = NULL; + } + + while ((handle = isc_astack_pop(sock->inactivehandles)) != NULL) { + nmhandle_free(sock, handle); + } + + if (sock->buf != NULL) { + isc_mem_put(sock->mgr->mctx, sock->buf, sock->buf_size); + } + + if (sock->quota != NULL) { + isc_quota_detach(&sock->quota); + } + + isc_astack_destroy(sock->inactivehandles); + + while ((uvreq = isc_astack_pop(sock->inactivereqs)) != NULL) { + isc_mem_put(sock->mgr->mctx, uvreq, sizeof(*uvreq)); + } + + isc_astack_destroy(sock->inactivereqs); + + isc_mem_free(sock->mgr->mctx, sock->ah_frees); + isc_mem_free(sock->mgr->mctx, sock->ah_handles); + + if (dofree) { + isc_nm_t *mgr = sock->mgr; + isc_mem_put(mgr->mctx, sock, sizeof(*sock)); + isc_nm_detach(&mgr); + } else { + isc_nm_detach(&sock->mgr); + } + +} + +static void +nmsocket_maybe_destroy(isc_nmsocket_t *sock) { + int active_handles = 0; + bool destroy = false; + + REQUIRE(!isc__nmsocket_active(sock)); + + if (sock->parent != NULL) { + /* + * This is a child socket and cannot be destroyed except + * as a side effect of destroying the parent, so let's go + * see if the parent is ready to be destroyed. + */ + nmsocket_maybe_destroy(sock->parent); + return; + } + + /* + * This is a parent socket (or a standalone). See whether the + * children have active handles before deciding whether to + * accept destruction. + */ + LOCK(&sock->lock); + active_handles += sock->ah_cpos; + if (sock->children != NULL) { + for (int i = 0; i < sock->nchildren; i++) { + LOCK(&sock->children[i].lock); + active_handles += sock->children[i].ah_cpos; + UNLOCK(&sock->children[i].lock); + } + } + + if (atomic_load(&sock->closed) && + atomic_load(&sock->references) == 0 && + (active_handles == 0 || sock->tcphandle != NULL)) + { + destroy = true; + } + UNLOCK(&sock->lock); + + if (destroy) { + nmsocket_cleanup(sock, true); + } +} + +void +isc__nmsocket_prep_destroy(isc_nmsocket_t *sock) { + REQUIRE(sock->parent == NULL); + + /* + * The final external reference to the socket is gone. We can try + * destroying the socket, but we have to wait for all the inflight + * handles to finish first. + */ + atomic_store(&sock->active, false); + + /* + * If the socket has children, they'll need to be marked inactive + * so they can be cleaned up too. + */ + if (sock->children != NULL) { + for (int i = 0; i < sock->nchildren; i++) { + atomic_store(&sock->children[i].active, false); + } + } + + /* + * If we're here then we already stopped listening; otherwise + * we'd have a hanging reference from the listening process. + * + * If it's a regular socket we may need to close it. + */ + if (!atomic_load(&sock->closed)) { + switch (sock->type) { + case isc_nm_tcpsocket: + isc__nm_tcp_close(sock); + break; + case isc_nm_tcpdnssocket: + isc__nm_tcpdns_close(sock); + break; + default: + break; + } + } + + nmsocket_maybe_destroy(sock); +} + +void +isc_nmsocket_detach(isc_nmsocket_t **sockp) { + REQUIRE(sockp != NULL && *sockp != NULL); + REQUIRE(VALID_NMSOCK(*sockp)); + + isc_nmsocket_t *sock = *sockp, *rsock = NULL; + int references; + *sockp = NULL; + + /* + * If the socket is a part of a set (a child socket) we are + * counting references for the whole set at the parent. + */ + if (sock->parent != NULL) { + rsock = sock->parent; + INSIST(rsock->parent == NULL); /* Sanity check */ + } else { + rsock = sock; + } + + references = isc_refcount_decrement(&rsock->references); + INSIST(references > 0); + if (references == 1) { + isc__nmsocket_prep_destroy(rsock); + } + +} + +void +isc__nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, + isc_nmsocket_type type) +{ + *sock = (isc_nmsocket_t) { + .type = type, + .fd = -1, + .ah_size = 32, + .inactivehandles = isc_astack_new(mgr->mctx, 60), + .inactivereqs = isc_astack_new(mgr->mctx, 60) + }; + + isc_nm_attach(mgr, &sock->mgr); + sock->uv_handle.handle.data = sock; + + sock->ah_frees = isc_mem_allocate(mgr->mctx, + sock->ah_size * sizeof(size_t)); + sock->ah_handles = isc_mem_allocate(mgr->mctx, + sock->ah_size * + sizeof(isc_nmhandle_t *)); + for (size_t i = 0; i < 32; i++) { + sock->ah_frees[i] = i; + sock->ah_handles[i] = NULL; + } + + isc_mutex_init(&sock->lock); + isc_condition_init(&sock->cond); + isc_refcount_init(&sock->references, 1); + atomic_init(&sock->active, true); + + sock->magic = NMSOCK_MAGIC; +} + +void +isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf) { + isc_nmsocket_t *sock = (isc_nmsocket_t *) handle->data; + isc__networker_t *worker = NULL; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(isc__nm_in_netthread()); + REQUIRE(size <= 65536); + + /* TODO that's for UDP only! */ + worker = &sock->mgr->workers[sock->tid]; + INSIST(!worker->udprecvbuf_inuse); + + buf->base = worker->udprecvbuf; + worker->udprecvbuf_inuse = true; + buf->len = size; +} + +void +isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf) { + isc__networker_t *worker = NULL; + + REQUIRE(VALID_NMSOCK(sock)); + + worker = &sock->mgr->workers[sock->tid]; + + REQUIRE(worker->udprecvbuf_inuse); + REQUIRE(buf->base == worker->udprecvbuf); + + UNUSED(buf); + + worker->udprecvbuf_inuse = false; +} + +static isc_nmhandle_t * +alloc_handle(isc_nmsocket_t *sock) { + isc_nmhandle_t *handle = + isc_mem_get(sock->mgr->mctx, + sizeof(isc_nmhandle_t) + sock->extrahandlesize); + + *handle = (isc_nmhandle_t) { + .magic = NMHANDLE_MAGIC + }; + isc_refcount_init(&handle->references, 1); + + return (handle); +} + +isc_nmhandle_t * +isc__nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, + isc_sockaddr_t *local) +{ + isc_nmhandle_t *handle = NULL; + int pos; + + REQUIRE(VALID_NMSOCK(sock)); + + handle = isc_astack_pop(sock->inactivehandles); + + if (handle == NULL) { + handle = alloc_handle(sock); + } else { + INSIST(VALID_NMHANDLE(handle)); + isc_refcount_increment(&handle->references); + } + + handle->sock = sock; + if (peer != NULL) { + memcpy(&handle->peer, peer, sizeof(isc_sockaddr_t)); + } else { + memcpy(&handle->peer, &sock->peer, sizeof(isc_sockaddr_t)); + } + + if (local != NULL) { + memcpy(&handle->local, local, sizeof(isc_sockaddr_t)); + } else if (sock->iface != NULL) { + memcpy(&handle->local, &sock->iface->addr, + sizeof(isc_sockaddr_t)); + } else { + INSIST(0); + ISC_UNREACHABLE(); + } + + LOCK(&sock->lock); + /* We need to add this handle to the list of active handles */ + if (sock->ah_cpos == sock->ah_size) { + sock->ah_frees = + isc_mem_reallocate(sock->mgr->mctx, sock->ah_frees, + sock->ah_size * 2 * + sizeof(size_t)); + sock->ah_handles = + isc_mem_reallocate(sock->mgr->mctx, + sock->ah_handles, + sock->ah_size * 2 * + sizeof(isc_nmhandle_t *)); + + for (size_t i = sock->ah_size; i < sock->ah_size * 2; i++) { + sock->ah_frees[i] = i; + sock->ah_handles[i] = NULL; + } + + sock->ah_size *= 2; + } + + pos = sock->ah_frees[sock->ah_cpos++]; + INSIST(sock->ah_handles[pos] == NULL); + sock->ah_handles[pos] = handle; + handle->ah_pos = pos; + UNLOCK(&sock->lock); + + if (sock->type == isc_nm_tcpsocket) { + INSIST(sock->tcphandle == NULL); + sock->tcphandle = handle; + } + + return (handle); +} + +void +isc_nmhandle_ref(isc_nmhandle_t *handle) { + int refs; + + REQUIRE(VALID_NMHANDLE(handle)); + + refs = isc_refcount_increment(&handle->references); + INSIST(refs > 0); + +} + +bool +isc_nmhandle_is_stream(isc_nmhandle_t *handle) { + REQUIRE(VALID_NMHANDLE(handle)); + + return (handle->sock->type == isc_nm_tcpsocket || + handle->sock->type == isc_nm_tcpdnssocket); +} + +static void +nmhandle_free(isc_nmsocket_t *sock, isc_nmhandle_t *handle) { + size_t extra = sock->extrahandlesize; + + if (handle->dofree) { + handle->dofree(handle->opaque); + } + + *handle = (isc_nmhandle_t) { + .magic = 0 + }; + isc_mem_put(sock->mgr->mctx, handle, sizeof(isc_nmhandle_t) + extra); +} + +void +isc_nmhandle_unref(isc_nmhandle_t *handle) { + int refs; + + REQUIRE(VALID_NMHANDLE(handle)); + + refs = isc_refcount_decrement(&handle->references); + INSIST(refs > 0); + if (refs == 1) { + isc_nmsocket_t *sock = handle->sock; + bool reuse = false; + + handle->sock = NULL; + if (handle->doreset != NULL) { + handle->doreset(handle->opaque); + } + + /* + * We do it all under lock to avoid races with socket + * destruction. + */ + LOCK(&sock->lock); + INSIST(sock->ah_handles[handle->ah_pos] == handle); + INSIST(sock->ah_size > handle->ah_pos); + INSIST(sock->ah_cpos > 0); + sock->ah_handles[handle->ah_pos] = NULL; + sock->ah_frees[--sock->ah_cpos] = handle->ah_pos; + handle->ah_pos = 0; + + if (atomic_load(&sock->active)) { + reuse = isc_astack_trypush(sock->inactivehandles, + handle); + } + UNLOCK(&sock->lock); + + if (!reuse) { + nmhandle_free(sock, handle); + } + + if (sock->ah_cpos == 0 && + !atomic_load(&sock->active) && + !atomic_load(&sock->destroying)) + { + nmsocket_maybe_destroy(sock); + } + } +} + +void * +isc_nmhandle_getdata(isc_nmhandle_t *handle) { + REQUIRE(VALID_NMHANDLE(handle)); + + return (handle->opaque); +} + +void +isc_nmhandle_setdata(isc_nmhandle_t *handle, void *arg, + isc_nm_opaquecb doreset, isc_nm_opaquecb dofree) +{ + REQUIRE(VALID_NMHANDLE(handle)); + + handle->opaque = arg; + handle->doreset = doreset; + handle->dofree = dofree; +} + +void * +isc_nmhandle_getextra(isc_nmhandle_t *handle) { + REQUIRE(VALID_NMHANDLE(handle)); + + return (handle->extra); +} + +isc_sockaddr_t +isc_nmhandle_peeraddr(isc_nmhandle_t *handle) { + REQUIRE(VALID_NMHANDLE(handle)); + + return (handle->peer); +} + +isc_sockaddr_t +isc_nmhandle_localaddr(isc_nmhandle_t *handle) { + REQUIRE(VALID_NMHANDLE(handle)); + + return (handle->local); +} + +isc__nm_uvreq_t * +isc__nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock) { + isc__nm_uvreq_t *req = NULL; + + REQUIRE(VALID_NM(mgr)); + REQUIRE(VALID_NMSOCK(sock)); + + if (sock != NULL && atomic_load(&sock->active)) { + /* Try to reuse one */ + req = isc_astack_pop(sock->inactivereqs); + } + + if (req == NULL) { + req = isc_mem_get(mgr->mctx, sizeof(isc__nm_uvreq_t)); + } + + *req = (isc__nm_uvreq_t) { + .magic = 0 + }; + req->uv_req.req.data = req; + isc_nmsocket_attach(sock, &req->sock); + req->magic = UVREQ_MAGIC; + + return (req); +} + +void +isc__nm_uvreq_put(isc__nm_uvreq_t **req0, isc_nmsocket_t *sock) { + isc__nm_uvreq_t *req = NULL; + isc_nmhandle_t *handle = NULL; + + REQUIRE(req0 != NULL); + REQUIRE(VALID_UVREQ(*req0)); + + req = *req0; + *req0 = NULL; + + INSIST(sock == req->sock); + + req->magic = 0; + + /* + * We need to save this first to make sure that handle, + * sock, and the netmgr won't all disappear. + */ + handle = req->handle; + req->handle = NULL; + + if (!atomic_load(&sock->active) || + !isc_astack_trypush(sock->inactivereqs, req)) + { + isc_mem_put(sock->mgr->mctx, req, sizeof(isc__nm_uvreq_t)); + } + + if (handle != NULL) { + isc_nmhandle_unref(handle); + } + + isc_nmsocket_detach(&sock); +} + +isc_result_t +isc_nm_send(isc_nmhandle_t *handle, isc_region_t *region, + isc_nm_cb_t cb, void *cbarg) +{ + REQUIRE(VALID_NMHANDLE(handle)); + + switch (handle->sock->type) { + case isc_nm_udpsocket: + case isc_nm_udplistener: + return (isc__nm_udp_send(handle, region, cb, cbarg)); + case isc_nm_tcpsocket: + return (isc__nm_tcp_send(handle, region, cb, cbarg)); + case isc_nm_tcpdnssocket: + return (isc__nm_tcpdns_send(handle, region, cb, cbarg)); + default: + INSIST(0); + ISC_UNREACHABLE(); + } +} + +bool +isc__nm_acquire_interlocked(isc_nm_t *mgr) { + LOCK(&mgr->lock); + bool success = atomic_compare_exchange_strong(&mgr->interlocked, + &(bool){false}, true); + UNLOCK(&mgr->lock); + return (success); +} + +void +isc__nm_drop_interlocked(isc_nm_t *mgr) { + LOCK(&mgr->lock); + bool success = atomic_compare_exchange_strong(&mgr->interlocked, + &(bool){true}, false); + INSIST(success == true); + BROADCAST(&mgr->wkstatecond); + UNLOCK(&mgr->lock); +} + +void +isc__nm_acquire_interlocked_force(isc_nm_t *mgr) { + LOCK(&mgr->lock); + while (!atomic_compare_exchange_strong(&mgr->interlocked, + &(bool){false}, true)) + { + WAIT(&mgr->wkstatecond, &mgr->lock); + } + UNLOCK(&mgr->lock); +} diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c new file mode 100644 index 0000000000..f0aabd28f8 --- /dev/null +++ b/lib/isc/netmgr/tcp.c @@ -0,0 +1,611 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "netmgr-int.h" + +static int +tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req); + +static void +tcp_close_direct(isc_nmsocket_t *sock); + +static isc_result_t +tcp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req); +static void +tcp_connect_cb(uv_connect_t *uvreq, int status); + +static void +tcp_connection_cb(uv_stream_t *server, int status); + +static void +read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf); + +static void +tcp_close_cb(uv_handle_t *uvhandle); + +static int +tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { + isc__networker_t *worker; + int r; + + REQUIRE(isc__nm_in_netthread()); + + worker = &sock->mgr->workers[isc_nm_tid()]; + + r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp); + if (r != 0) { + return (r); + } + + if (req->local.length != 0) { + r = uv_tcp_bind(&sock->uv_handle.tcp, &req->local.type.sa, 0); + if (r != 0) { + tcp_close_direct(sock); + return (r); + } + } + + r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp, + &req->peer.type.sa, tcp_connect_cb); + return (r); +} + +void +isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ievent0) { + isc__netievent_tcpconnect_t *ievent = + (isc__netievent_tcpconnect_t *) ievent0; + isc_nmsocket_t *sock = ievent->sock; + isc__nm_uvreq_t *req = ievent->req; + int r; + + REQUIRE(sock->type == isc_nm_tcpsocket); + REQUIRE(worker->id == ievent->req->sock->mgr->workers[isc_nm_tid()].id); + + r = tcp_connect_direct(sock, req); + if (r != 0) { + /* We need to issue callbacks ourselves */ + tcp_connect_cb(&req->uv_req.connect, r); + } +} + +static void +tcp_connect_cb(uv_connect_t *uvreq, int status) { + isc__nm_uvreq_t *req = (isc__nm_uvreq_t *) uvreq->data; + isc_nmsocket_t *sock = uvreq->handle->data; + + REQUIRE(VALID_UVREQ(req)); + + if (status == 0) { + isc_result_t result; + isc_nmhandle_t *handle = NULL; + struct sockaddr_storage ss; + + uv_tcp_getpeername(&sock->uv_handle.tcp, + (struct sockaddr *) &ss, + &(int){sizeof(ss)}); + result = isc_sockaddr_fromsockaddr(&sock->peer, + (struct sockaddr *) &ss); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + + handle = isc__nmhandle_get(sock, NULL, NULL); + req->cb.connect(handle, ISC_R_SUCCESS, req->cbarg); + } else { + /* TODO handle it properly, free sock, translate code */ + req->cb.connect(NULL, ISC_R_FAILURE, req->cbarg); + } + + isc__nm_uvreq_put(&req, sock); +} + +isc_result_t +isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface, + isc_nm_cb_t cb, void *cbarg, + size_t extrahandlesize, isc_quota_t *quota, + isc_nmsocket_t **rv) +{ + isc__netievent_tcplisten_t *ievent = NULL; + isc_nmsocket_t *nsock = NULL; + + REQUIRE(VALID_NM(mgr)); + + nsock = isc_mem_get(mgr->mctx, sizeof(*nsock)); + isc__nmsocket_init(nsock, mgr, isc_nm_tcplistener); + nsock->iface = iface; + nsock->rcb.accept = cb; + nsock->rcbarg = cbarg; + nsock->extrahandlesize = extrahandlesize; + if (quota != NULL) { + /* + * We need to force it to make sure we get it attached. + * An example failure mode would be server under attack + * reconfiguring interfaces - that might cause weak attach + * to fail and leave this listening socket without limits. + * We can ignore the result. + */ + isc_quota_force(quota, &nsock->quota); + } + nsock->tid = isc_random_uniform(mgr->nworkers); + + /* + * Listening to TCP is rare enough not to care about the + * added overhead from passing this to another thread. + */ + ievent = isc__nm_get_ievent(mgr, netievent_tcplisten); + ievent->sock = nsock; + isc__nm_enqueue_ievent(&mgr->workers[nsock->tid], + (isc__netievent_t *) ievent); + *rv = nsock; + + return (ISC_R_SUCCESS); +} + +void +isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ievent0) { + isc__netievent_tcplisten_t *ievent = + (isc__netievent_tcplisten_t *) ievent0; + isc_nmsocket_t *sock = ievent->sock; + int r; + + REQUIRE(isc__nm_in_netthread()); + REQUIRE(sock->type == isc_nm_tcplistener); + + r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp); + if (r != 0) { + return; + } + + uv_tcp_bind(&sock->uv_handle.tcp, &sock->iface->addr.type.sa, 0); + r = uv_listen((uv_stream_t *) &sock->uv_handle.tcp, 10, + tcp_connection_cb); + if (r != 0) { + return; + } + + atomic_store(&sock->listening, true); + + return; +} + +void +isc_nm_tcp_stoplistening(isc_nmsocket_t *sock) { + isc__netievent_tcpstoplisten_t *ievent = NULL; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(!isc__nm_in_netthread()); + + ievent = isc__nm_get_ievent(sock->mgr, netievent_tcpstoplisten); + isc_nmsocket_attach(sock, &ievent->sock); + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *) ievent); +} + +static void +stoplistening_cb(uv_handle_t *handle) { + isc_nmsocket_t *sock = handle->data; + + LOCK(&sock->lock); + atomic_store(&sock->listening, false); + atomic_store(&sock->closed, true); + SIGNAL(&sock->cond); + UNLOCK(&sock->lock); + + if (sock->quota != NULL) { + isc_quota_detach(&sock->quota); + } + + isc_nmsocket_detach(&sock); +} + +void +isc__nm_async_tcpstoplisten(isc__networker_t *worker, + isc__netievent_t *ievent0) +{ + isc__netievent_tcpstoplisten_t *ievent = + (isc__netievent_tcpstoplisten_t *) ievent0; + isc_nmsocket_t *sock = ievent->sock; + + UNUSED(worker); + + REQUIRE(isc__nm_in_netthread()); + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tcplistener); + + uv_close(&sock->uv_handle.handle, stoplistening_cb); +} + +isc_result_t +isc_nm_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { + isc_nmsocket_t *sock = NULL; + + REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); + + sock = handle->sock; + sock->rcb.recv = cb; + sock->rcbarg = cbarg; /* That's obviously broken... */ + if (sock->tid == isc_nm_tid()) { + int r = uv_read_start(&sock->uv_handle.stream, + isc__nm_alloc_cb, read_cb); + INSIST(r == 0); + } else { + isc__netievent_startread_t *ievent = + isc__nm_get_ievent(sock->mgr, + netievent_tcpstartread); + ievent->sock = sock; + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *) ievent); + } + + return (ISC_R_SUCCESS); +} + +void +isc__nm_async_startread(isc__networker_t *worker, isc__netievent_t *ievent0) { + isc__netievent_startread_t *ievent = + (isc__netievent_startread_t *) ievent0; + isc_nmsocket_t *sock = ievent->sock; + + REQUIRE(worker->id == isc_nm_tid()); + + uv_read_start(&sock->uv_handle.stream, isc__nm_alloc_cb, read_cb); +} + +isc_result_t +isc_nm_pauseread(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + + if (sock->tid == isc_nm_tid()) { + int r = uv_read_stop(&sock->uv_handle.stream); + INSIST(r == 0); + } else { + isc__netievent_pauseread_t *ievent = + isc__nm_get_ievent(sock->mgr, + netievent_tcppauseread); + ievent->sock = sock; + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *) ievent); + } + + return (ISC_R_SUCCESS); +} + +void +isc__nm_async_pauseread(isc__networker_t *worker, isc__netievent_t *ievent0) { + isc__netievent_pauseread_t *ievent = + (isc__netievent_pauseread_t *) ievent0; + isc_nmsocket_t *sock = ievent->sock; + REQUIRE(VALID_NMSOCK(sock)); + + REQUIRE(worker->id == isc_nm_tid()); + + uv_read_stop(&sock->uv_handle.stream); +} + +isc_result_t +isc_nm_resumeread(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->rcb.recv != NULL); + + if (sock->tid == isc_nm_tid()) { + int r = uv_read_start(&sock->uv_handle.stream, + isc__nm_alloc_cb, read_cb); + INSIST(r == 0); + } else { + /* It's the same as startread */ + isc__netievent_startread_t *ievent = + isc__nm_get_ievent(sock->mgr, + netievent_tcpstartread); + ievent->sock = sock; + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *) ievent); + } + + return (ISC_R_SUCCESS); +} + +static void +read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { + isc_nmsocket_t *sock = stream->data; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(buf != NULL); + + if (nread >= 0) { + isc_region_t region = { + .base = (unsigned char *) buf->base, + .length = nread + }; + + INSIST(sock->rcb.recv != NULL); + sock->rcb.recv(sock->tcphandle, ®ion, sock->rcbarg); + isc__nm_free_uvbuf(sock, buf); + return; + } + + isc__nm_free_uvbuf(sock, buf); + if (sock->quota) { + isc_quota_detach(&sock->quota); + } + sock->rcb.recv(sock->tcphandle, NULL, sock->rcbarg); + + /* + * XXXWPK TODO clean up handles, close the connection, + * reclaim quota + */ +} + +static isc_result_t +accept_connection(isc_nmsocket_t *ssock) { + isc_result_t result; + isc_quota_t *quota = NULL; + isc_nmsocket_t *csock = NULL; + isc__networker_t *worker = NULL; + isc_nmhandle_t *handle = NULL; + struct sockaddr_storage ss; + isc_sockaddr_t local; + int r; + + REQUIRE(VALID_NMSOCK(ssock)); + REQUIRE(ssock->tid == isc_nm_tid()); + + if (!atomic_load_relaxed(&ssock->active)) { + /* We're closing, bail */ + return (ISC_R_CANCELED); + } + + if (ssock->quota != NULL) { + result = isc_quota_attach(ssock->quota, "a); + if (result != ISC_R_SUCCESS) { + return (result); + } + } + + csock = isc_mem_get(ssock->mgr->mctx, sizeof(isc_nmsocket_t)); + isc__nmsocket_init(csock, ssock->mgr, isc_nm_tcpsocket); + csock->tid = isc_nm_tid(); + csock->extrahandlesize = ssock->extrahandlesize; + csock->iface = ssock->iface; + csock->quota = quota; + quota = NULL; + + worker = &ssock->mgr->workers[isc_nm_tid()]; + uv_tcp_init(&worker->loop, &csock->uv_handle.tcp); + + r = uv_accept(&ssock->uv_handle.stream, &csock->uv_handle.stream); + if (r != 0) { + if (csock->quota != NULL) { + isc_quota_detach(&csock->quota); + } + isc_mem_put(ssock->mgr->mctx, csock, sizeof(isc_nmsocket_t)); + + return (isc__nm_uverr2result(r)); + } + + isc_nmsocket_attach(ssock, &csock->server); + + uv_tcp_getpeername(&csock->uv_handle.tcp, (struct sockaddr *) &ss, + &(int){sizeof(ss)}); + + result = isc_sockaddr_fromsockaddr(&csock->peer, + (struct sockaddr *) &ss); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + uv_tcp_getsockname(&csock->uv_handle.tcp, (struct sockaddr *) &ss, + &(int){sizeof(ss)}); + result = isc_sockaddr_fromsockaddr(&local, + (struct sockaddr *) &ss); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + + handle = isc__nmhandle_get(csock, NULL, &local); + + INSIST(ssock->rcb.accept != NULL); + ssock->rcb.accept(handle, ISC_R_SUCCESS, ssock->rcbarg); + isc_nmsocket_detach(&csock); + + return (ISC_R_SUCCESS); +} + +static void +tcp_connection_cb(uv_stream_t *server, int status) { + isc_nmsocket_t *ssock = server->data; + isc_result_t result = accept_connection(ssock); + + UNUSED(status); + + if (result != ISC_R_SUCCESS) { + if (result == ISC_R_QUOTA || result == ISC_R_SOFTQUOTA) { + ssock->overquota = true; + } + /* XXXWPK TODO LOG */ + } +} + +/* + * isc__nm_tcp_send sends buf to a peer on a socket. + */ +isc_result_t +isc__nm_tcp_send(isc_nmhandle_t *handle, isc_region_t *region, + isc_nm_cb_t cb, void *cbarg) +{ + isc_nmsocket_t *sock = handle->sock; + isc__netievent_tcpsend_t *ievent = NULL; + isc__nm_uvreq_t *uvreq = NULL; + + REQUIRE(sock->type == isc_nm_tcpsocket); + + uvreq = isc__nm_uvreq_get(sock->mgr, sock); + uvreq->uvbuf.base = (char *) region->base; + uvreq->uvbuf.len = region->length; + uvreq->handle = handle; + isc_nmhandle_ref(uvreq->handle); + uvreq->cb.send = cb; + uvreq->cbarg = cbarg; + + if (sock->tid == isc_nm_tid()) { + /* + * If we're in the same thread as the socket we can send the + * data directly + */ + return (tcp_send_direct(sock, uvreq)); + } else { + /* + * We need to create an event and pass it using async channel + */ + ievent = isc__nm_get_ievent(sock->mgr, netievent_tcpsend); + ievent->sock = sock; + ievent->req = uvreq; + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *) ievent); + return (ISC_R_SUCCESS); + } + + return (ISC_R_UNEXPECTED); +} + +static void +tcp_send_cb(uv_write_t *req, int status) { + isc_result_t result = ISC_R_SUCCESS; + isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *) req->data; + + REQUIRE(VALID_UVREQ(uvreq)); + REQUIRE(VALID_NMHANDLE(uvreq->handle)); + + if (status < 0) { + result = isc__nm_uverr2result(status); + } + + uvreq->cb.send(uvreq->handle, result, uvreq->cbarg); + isc_nmhandle_unref(uvreq->handle); + isc__nm_uvreq_put(&uvreq, uvreq->handle->sock); +} + +/* + * Handle 'tcpsend' async event - send a packet on the socket + */ +void +isc__nm_async_tcpsend(isc__networker_t *worker, isc__netievent_t *ievent0) { + isc_result_t result; + isc__netievent_tcpsend_t *ievent = (isc__netievent_tcpsend_t *) ievent0; + + REQUIRE(worker->id == ievent->sock->tid); + + if (!atomic_load(&ievent->sock->active)) { + return; + } + + result = tcp_send_direct(ievent->sock, ievent->req); + if (result != ISC_R_SUCCESS) { + ievent->req->cb.send(ievent->req->handle, + result, ievent->req->cbarg); + isc__nm_uvreq_put(&ievent->req, ievent->req->handle->sock); + } +} + +static isc_result_t +tcp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { + int r; + + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(sock->type == isc_nm_tcpsocket); + + isc_nmhandle_ref(req->handle); + r = uv_write(&req->uv_req.write, &sock->uv_handle.stream, + &req->uvbuf, 1, tcp_send_cb); + if (r < 0) { + req->cb.send(NULL, isc__nm_uverr2result(r), req->cbarg); + isc__nm_uvreq_put(&req, sock); + return (isc__nm_uverr2result(r)); + } + + return (ISC_R_SUCCESS); +} + +static void +tcp_close_cb(uv_handle_t *uvhandle) { + isc_nmsocket_t *sock = uvhandle->data; + + REQUIRE(VALID_NMSOCK(sock)); + + atomic_store(&sock->closed, true); + isc__nmsocket_prep_destroy(sock); +} + +static void +tcp_close_direct(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(sock->type == isc_nm_tcpsocket); + + if (sock->quota != NULL) { + isc_nmsocket_t *ssock = sock->server; + + isc_quota_detach(&sock->quota); + + if (ssock->overquota) { + /* XXXWPK TODO we should loop here */ + isc_result_t result = accept_connection(ssock); + if (result != ISC_R_QUOTA && result != ISC_R_SOFTQUOTA) + { + ssock->overquota = false; + } + } + } + + isc_nmsocket_detach(&sock->server); + uv_close(&sock->uv_handle.handle, tcp_close_cb); +} + +void +isc__nm_tcp_close(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tcpsocket); + + if (sock->tid == isc_nm_tid()) { + tcp_close_direct(sock); + } else { + /* + * We need to create an event and pass it using async channel + */ + isc__netievent_tcpclose_t *ievent = + isc__nm_get_ievent(sock->mgr, netievent_tcpclose); + + ievent->sock = sock; + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *) ievent); + } +} + +void +isc__nm_async_tcpclose(isc__networker_t *worker, isc__netievent_t *ievent0) { + isc__netievent_tcpclose_t *ievent = + (isc__netievent_tcpclose_t *) ievent0; + + REQUIRE(worker->id == ievent->sock->tid); + + tcp_close_direct(ievent->sock); +} diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c new file mode 100644 index 0000000000..0c38ecf494 --- /dev/null +++ b/lib/isc/netmgr/tcpdns.c @@ -0,0 +1,405 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "netmgr-int.h" + +static void +dnslisten_readcb(isc_nmhandle_t *handle, isc_region_t *region, void *arg); + +static inline size_t +dnslen(unsigned char* base) { + return ((base[0] << 8) + (base[1])); +} + +#define NM_REG_BUF 4096 +#define NM_BIG_BUF 65536 +static inline void +alloc_dnsbuf(isc_nmsocket_t *sock, size_t len) { + REQUIRE(len <= NM_BIG_BUF); + + if (sock->buf == NULL) { + /* We don't have the buffer at all */ + size_t alloc_len = len < NM_REG_BUF ? NM_REG_BUF : NM_BIG_BUF; + sock->buf = isc_mem_get(sock->mgr->mctx, alloc_len); + sock->buf_size = alloc_len; + } else { + /* We have the buffer but it's too small */ + sock->buf = isc_mem_reallocate(sock->mgr->mctx, sock->buf, + NM_BIG_BUF); + sock->buf_size = NM_BIG_BUF; + } +} + + +/* + * Accept callback for TCP-DNS connection + */ +static void +dnslisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { + isc_nmsocket_t *dnslistensock = (isc_nmsocket_t *) cbarg; + isc_nmsocket_t *dnssock = NULL; + + REQUIRE(VALID_NMSOCK(dnslistensock)); + REQUIRE(dnslistensock->type == isc_nm_tcpdnslistener); + + /* If accept() was unnsuccessful we can't do anything */ + if (result != ISC_R_SUCCESS) { + return; + } + + /* We need to create a 'wrapper' dnssocket for this connection */ + dnssock = isc_mem_get(handle->sock->mgr->mctx, sizeof(*dnssock)); + isc__nmsocket_init(dnssock, handle->sock->mgr, + isc_nm_tcpdnssocket); + + /* We need to copy read callbacks from outer socket */ + dnssock->rcb.recv = dnslistensock->rcb.recv; + dnssock->rcbarg = dnslistensock->rcbarg; + dnssock->extrahandlesize = dnslistensock->extrahandlesize; + isc_nmsocket_attach(handle->sock, &dnssock->outer); + dnssock->peer = handle->sock->peer; + dnssock->iface = handle->sock->iface; + + isc_nm_read(handle, dnslisten_readcb, dnssock); +} + +/* + * We've got a read on our underlying socket, need to check if we have + * a complete DNS packet and, if so - call the callback + */ +static void +dnslisten_readcb(isc_nmhandle_t *handle, isc_region_t *region, void *arg) { + isc_nmsocket_t *dnssock = (isc_nmsocket_t *) arg; + isc_sockaddr_t local; + unsigned char *base = NULL; + size_t len; + + REQUIRE(VALID_NMSOCK(dnssock)); + REQUIRE(VALID_NMHANDLE(handle)); + + if (region == NULL) { + /* Connection closed */ + atomic_store(&dnssock->closed, true); + isc_nmsocket_detach(&dnssock->outer); + isc_nmsocket_detach(&dnssock); + return; + } + + local = isc_nmhandle_localaddr(handle); + + base = region->base; + len = region->length; + + /* + * We have something in the buffer, we need to glue it. + */ + if (dnssock->buf_len > 0) { + size_t plen; + + if (dnssock->buf_len == 1) { + /* Make sure we have the length */ + dnssock->buf[1] = base[0]; + dnssock->buf_len = 2; + base++; + len--; + } + + /* At this point we definitely have 2 bytes there. */ + plen = ISC_MIN(len, (dnslen(dnssock->buf) + 2 - + dnssock->buf_len)); + if (plen > dnssock->buf_size) { + alloc_dnsbuf(dnssock, plen); + } + + memmove(dnssock->buf + dnssock->buf_len, base, plen); + dnssock->buf_len += plen; + base += plen; + len -= plen; + + /* Do we have a complete packet in the buffer? */ + if (dnslen(dnssock->buf) == dnssock->buf_len - 2) { + isc_nmhandle_t *dnshandle = NULL; + isc_region_t r2 = { + .base = dnssock->buf + 2, + .length = dnslen(dnssock->buf) + }; + dnshandle = isc__nmhandle_get(dnssock, NULL, &local); + atomic_store(&dnssock->processing, true); + dnssock->rcb.recv(dnshandle, &r2, dnssock->rcbarg); + dnssock->buf_len = 0; + + /* + * If the recv callback wants to hold on to the + * handle, it needs to attach to it. + */ + isc_nmhandle_unref(dnshandle); + } + } + + /* + * At this point we've processed whatever was previously in the + * socket buffer. If there are more messages to be found in what + * we've read, and if we're either pipelining or not processing + * anything else, then we can process those messages now. + */ + while (len >= 2 && dnslen(base) <= len - 2 && + !(atomic_load(&dnssock->sequential) && + atomic_load(&dnssock->processing))) + { + isc_nmhandle_t *dnshandle = NULL; + isc_region_t r2 = { + .base = base + 2, + .length = dnslen(base) + }; + + len -= dnslen(base) + 2; + base += dnslen(base) + 2; + + dnshandle = isc__nmhandle_get(dnssock, NULL, &local); + atomic_store(&dnssock->processing, true); + dnssock->rcb.recv(dnshandle, &r2, dnssock->rcbarg); + + /* + * If the recv callback wants to hold on to the + * handle, it needs to attach to it. + */ + isc_nmhandle_unref(dnshandle); + } + + /* + * We have less than a full message remaining; it can be + * stored in the socket buffer for next time. + */ + if (len > 0) { + if (len > dnssock->buf_size) { + alloc_dnsbuf(dnssock, len); + } + + INSIST(len <= dnssock->buf_size); + memmove(dnssock->buf, base, len); + dnssock->buf_len = len; + } +} + +/* Process all complete packets out of incoming buffer */ +static void +processbuffer(isc_nmsocket_t *dnssock) { + REQUIRE(VALID_NMSOCK(dnssock)); + + /* While we have a complete packet in the buffer */ + while (dnssock->buf_len > 2 && + dnslen(dnssock->buf) <= dnssock->buf_len - 2) + { + isc_nmhandle_t *dnshandle = NULL; + isc_region_t r2 = { + .base = dnssock->buf + 2, + .length = dnslen(dnssock->buf) + }; + size_t len; + + dnshandle = isc__nmhandle_get(dnssock, NULL, NULL); + atomic_store(&dnssock->processing, true); + dnssock->rcb.recv(dnshandle, &r2, dnssock->rcbarg); + + /* + * If the recv callback wants to hold on to the + * handle, it needs to attach to it. + */ + isc_nmhandle_unref(dnshandle); + + len = dnslen(dnssock->buf) + 2; + dnssock->buf_len -= len; + if (len > 0) { + memmove(dnssock->buf, dnssock->buf + len, + dnssock->buf_len); + } + + /* Check here to make sure we do the processing at least once */ + if (atomic_load(&dnssock->processing)) { + return; + } + } +} + +/* + * isc_nm_listentcpdns listens for connections and accepts + * them immediately, then calls the cb for each incoming DNS packet + * (with 2-byte length stripped) - just like for UDP packet. + */ +isc_result_t +isc_nm_listentcpdns(isc_nm_t *mgr, isc_nmiface_t *iface, + isc_nm_recv_cb_t cb, void *cbarg, + size_t extrahandlesize, isc_quota_t *quota, + isc_nmsocket_t **rv) +{ + /* A 'wrapper' socket object with outer set to true TCP socket */ + isc_nmsocket_t *dnslistensock = + isc_mem_get(mgr->mctx, sizeof(*dnslistensock)); + isc_result_t result; + + REQUIRE(VALID_NM(mgr)); + + isc__nmsocket_init(dnslistensock, mgr, isc_nm_tcpdnslistener); + dnslistensock->iface = iface; + dnslistensock->rcb.recv = cb; + dnslistensock->rcbarg = cbarg; + dnslistensock->extrahandlesize = extrahandlesize; + + /* We set dnslistensock->outer to a true listening socket */ + result = isc_nm_listentcp(mgr, iface, dnslisten_acceptcb, + dnslistensock, extrahandlesize, + quota, &dnslistensock->outer); + + atomic_store(&dnslistensock->listening, true); + *rv = dnslistensock; + return (result); +} + +void +isc_nm_tcpdns_stoplistening(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tcpdnslistener); + + atomic_store(&sock->listening, false); + atomic_store(&sock->closed, true); + + if (sock->outer != NULL) { + isc_nm_tcp_stoplistening(sock->outer); + isc_nmsocket_detach(&sock->outer); + } +} + +void +isc_nm_tcpdns_sequential(isc_nmhandle_t *handle) { + REQUIRE(VALID_NMHANDLE(handle)); + + if (handle->sock->type != isc_nm_tcpdnssocket || + handle->sock->outer == NULL) + { + return; + } + + /* + * We don't want pipelining on this connection. That means + * that we can launch query processing only when the previous + * one returned. + * + * The socket MUST be unpaused after the query is processed. + * This is done by isc_nm_resumeread() in tcpdnssend_cb() below. + * + * XXX: The callback is not currently executed in failure cases! + */ + isc_nm_pauseread(handle->sock->outer); + atomic_store(&handle->sock->sequential, true); +} + +typedef struct tcpsend { + isc_mem_t *mctx; + isc_nmhandle_t *handle; + isc_region_t region; + isc_nmhandle_t *orighandle; + isc_nm_cb_t cb; + void *cbarg; +} tcpsend_t; + +static void +tcpdnssend_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { + tcpsend_t *ts = (tcpsend_t *) cbarg; + + UNUSED(handle); + + ts->cb(ts->orighandle, result, ts->cbarg); + isc_mem_put(ts->mctx, ts->region.base, ts->region.length); + + /* + * The response was sent, if we're in sequential mode resume + * processing. + */ + if (atomic_load(&ts->orighandle->sock->sequential)) { + atomic_store(&ts->orighandle->sock->processing, false); + processbuffer(ts->orighandle->sock); + isc_nm_resumeread(handle->sock); + } + + isc_nmhandle_unref(ts->orighandle); + isc_mem_putanddetach(&ts->mctx, ts, sizeof(*ts)); +} + +/* + * isc__nm_tcp_send sends buf to a peer on a socket. + */ +isc_result_t +isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region, + isc_nm_cb_t cb, void *cbarg) +{ + tcpsend_t *t = NULL; + + REQUIRE(VALID_NMHANDLE(handle)); + + isc_nmsocket_t *sock = handle->sock; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tcpdnssocket); + + if (sock->outer == NULL) { + /* The socket is closed, just issue the callback */ + cb(handle, ISC_R_FAILURE, cbarg); + return (ISC_R_NOTCONNECTED); + } + + t = isc_mem_get(sock->mgr->mctx, sizeof(*t)); + *t = (tcpsend_t) { + .cb = cb, + .cbarg = cbarg, + .handle = handle->sock->outer->tcphandle, + }; + + isc_mem_attach(sock->mgr->mctx, &t->mctx); + t->orighandle = handle; + isc_nmhandle_ref(t->orighandle); + + t->region = (isc_region_t) { + .base = isc_mem_get(t->mctx, region->length + 2), + .length = region->length + 2 + }; + + *(uint16_t *) t->region.base = htons(region->length); + memmove(t->region.base + 2, region->base, region->length); + + return (isc__nm_tcp_send(t->handle, &t->region, tcpdnssend_cb, t)); +} + +void +isc__nm_tcpdns_close(isc_nmsocket_t *sock) { + if (sock->outer != NULL) { + isc_nmsocket_detach(&sock->outer); + } + + atomic_store(&sock->closed, true); + isc__nmsocket_prep_destroy(sock); +} diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c new file mode 100644 index 0000000000..e14dc2dd93 --- /dev/null +++ b/lib/isc/netmgr/udp.c @@ -0,0 +1,461 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "netmgr-int.h" + +static isc_result_t +udp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, + isc_sockaddr_t *peer); + +static void +udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, + const struct sockaddr *addr, unsigned flags); + +static void +udp_send_cb(uv_udp_send_t *req, int status); + +isc_result_t +isc_nm_listenudp(isc_nm_t *mgr, isc_nmiface_t *iface, + isc_nm_recv_cb_t cb, void *cbarg, + size_t extrahandlesize, isc_nmsocket_t **sockp) +{ + isc_nmsocket_t *nsock = NULL; + + REQUIRE(VALID_NM(mgr)); + + /* + * We are creating mgr->nworkers duplicated sockets, one + * socket for each worker thread. + */ + nsock = isc_mem_get(mgr->mctx, sizeof(isc_nmsocket_t)); + isc__nmsocket_init(nsock, mgr, isc_nm_udplistener); + nsock->iface = iface; + nsock->nchildren = mgr->nworkers; + atomic_init(&nsock->rchildren, mgr->nworkers); + nsock->children = isc_mem_get(mgr->mctx, + mgr->nworkers * sizeof(*nsock)); + memset(nsock->children, 0, mgr->nworkers * sizeof(*nsock)); + + INSIST(nsock->rcb.recv == NULL && nsock->rcbarg == NULL); + nsock->rcb.recv = cb; + nsock->rcbarg = cbarg; + nsock->extrahandlesize = extrahandlesize; + + for (size_t i = 0; i < mgr->nworkers; i++) { + uint16_t family = iface->addr.type.sa.sa_family; + int res; + + isc__netievent_udplisten_t *ievent = NULL; + isc_nmsocket_t *csock = &nsock->children[i]; + + isc__nmsocket_init(csock, mgr, isc_nm_udpsocket); + csock->parent = nsock; + csock->iface = iface; + csock->tid = i; + csock->extrahandlesize = extrahandlesize; + + INSIST(csock->rcb.recv == NULL && csock->rcbarg == NULL); + csock->rcb.recv = cb; + csock->rcbarg = cbarg; + csock->fd = socket(family, SOCK_DGRAM, 0); + INSIST(csock->fd >= 0); + + /* + * This is SO_REUSE**** hell: + * On Linux SO_REUSEPORT allows multiple sockets to bind to + * the same host:port pair. + * On Windows the same thing is achieved with SO_REUSEADDR + */ +#ifdef WIN32 + res = setsockopt(csock->fd, SOL_SOCKET, SO_REUSEADDR, + &(int){1}, sizeof(int)); +#else + res = setsockopt(csock->fd, SOL_SOCKET, SO_REUSEPORT, + &(int){1}, sizeof(int)); +#endif + RUNTIME_CHECK(res == 0); + + ievent = isc__nm_get_ievent(mgr, netievent_udplisten); + ievent->sock = csock; + isc__nm_enqueue_ievent(&mgr->workers[i], + (isc__netievent_t *) ievent); + } + + *sockp = nsock; + return (ISC_R_SUCCESS); +} + +/* + * handle 'udplisten' async call - start listening on a socket. + */ +void +isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ievent0) { + isc__netievent_udplisten_t *ievent = + (isc__netievent_udplisten_t *) ievent0; + isc_nmsocket_t *sock = ievent->sock; + + REQUIRE(sock->type == isc_nm_udpsocket); + REQUIRE(sock->iface != NULL); + REQUIRE(sock->parent != NULL); + + uv_udp_init(&worker->loop, &sock->uv_handle.udp); + sock->uv_handle.udp.data = NULL; + isc_nmsocket_attach(sock, + (isc_nmsocket_t **)&sock->uv_handle.udp.data); + + uv_udp_open(&sock->uv_handle.udp, sock->fd); + uv_udp_bind(&sock->uv_handle.udp, + &sock->parent->iface->addr.type.sa, 0); + uv_recv_buffer_size(&sock->uv_handle.handle, + &(int){16 * 1024 * 1024}); + uv_send_buffer_size(&sock->uv_handle.handle, + &(int){16 * 1024 * 1024}); + uv_udp_recv_start(&sock->uv_handle.udp, isc__nm_alloc_cb, + udp_recv_cb); +} + +static void +udp_close_cb(uv_handle_t *handle) { + isc_nmsocket_t *sock = handle->data; + atomic_store(&sock->closed, true); + + isc_nmsocket_detach((isc_nmsocket_t **)&sock->uv_handle.udp.data); +} + +static void +stop_udp_child(isc_nmsocket_t *sock) { + INSIST(sock->type == isc_nm_udpsocket); + + uv_udp_recv_stop(&sock->uv_handle.udp); + uv_close((uv_handle_t *) &sock->uv_handle.udp, udp_close_cb); + + LOCK(&sock->parent->lock); + atomic_fetch_sub(&sock->parent->rchildren, 1); + UNLOCK(&sock->parent->lock); + BROADCAST(&sock->parent->cond); +} + +static void +stoplistening(isc_nmsocket_t *sock) { + /* + * Socket is already closing; there's nothing to do. + */ + if (uv_is_closing((uv_handle_t *) &sock->uv_handle.udp)) { + return; + } + + INSIST(sock->type == isc_nm_udplistener); + + for (int i = 0; i < sock->nchildren; i++) { + isc__netievent_udplisten_t *event = NULL; + + if (i == sock->tid) { + stop_udp_child(&sock->children[i]); + continue; + } + + event = isc__nm_get_ievent(sock->mgr, netievent_udpstoplisten); + event->sock = &sock->children[i]; + isc__nm_enqueue_ievent(&sock->mgr->workers[i], + (isc__netievent_t *) event); + } + + LOCK(&sock->lock); + while (atomic_load_relaxed(&sock->rchildren) > 0) { + WAIT(&sock->cond, &sock->lock); + } + atomic_store(&sock->closed, true); + UNLOCK(&sock->lock); + + isc__nmsocket_prep_destroy(sock); +} + +void +isc_nm_udp_stoplistening(isc_nmsocket_t *sock) { + isc__netievent_udpstoplisten_t *ievent = NULL; + + /* We can't be launched from network thread, we'd deadlock */ + REQUIRE(!isc__nm_in_netthread()); + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_udplistener); + + /* + * If the manager is interlocked, re-enqueue this as an asynchronous + * event. Otherwise, go ahead and stop listening right away. + */ + if (!isc__nm_acquire_interlocked(sock->mgr)) { + ievent = isc__nm_get_ievent(sock->mgr, netievent_udpstoplisten); + ievent->sock = sock; + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *) ievent); + } else { + stoplistening(sock); + isc__nm_drop_interlocked(sock->mgr); + } +} + +/* + * handle 'udpstoplisten' async call - stop listening on a socket. + */ +void +isc__nm_async_udpstoplisten(isc__networker_t *worker, + isc__netievent_t *ievent0) +{ + isc__netievent_udplisten_t *ievent = + (isc__netievent_udplisten_t *) ievent0; + isc_nmsocket_t *sock = ievent->sock; + + REQUIRE(sock->iface != NULL); + UNUSED(worker); + + /* + * If this is a child socket, stop listening and return. + */ + if (sock->parent != NULL) { + stop_udp_child(sock); + return; + } + + /* + * If network manager is paused, re-enqueue the event for later. + */ + if (!isc__nm_acquire_interlocked(sock->mgr)) { + isc__netievent_udplisten_t *event = NULL; + + event = isc__nm_get_ievent(sock->mgr, netievent_udpstoplisten); + event->sock = sock; + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *) event); + } else { + stoplistening(sock); + isc__nm_drop_interlocked(sock->mgr); + } +} + +/* + * udp_recv_cb handles incoming UDP packet from uv. + * The buffer here is reused for a series of packets, + * so we need to allocate a new one. This new one can + * be reused to send the response then. + */ +static void +udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, + const struct sockaddr *addr, unsigned flags) +{ + isc_result_t result; + isc_nmhandle_t *nmhandle = NULL; + isc_sockaddr_t sockaddr; + isc_sockaddr_t localaddr; + struct sockaddr_storage laddr; + isc_nmsocket_t *sock = (isc_nmsocket_t *) handle->data; + isc_region_t region; + uint32_t maxudp; + + REQUIRE(VALID_NMSOCK(sock)); + + /* XXXWPK TODO handle it! */ + UNUSED(flags); + + /* + * If addr == NULL that's the end of stream - we can + * free the buffer and bail. + */ + if (addr == NULL) { + isc__nm_free_uvbuf(sock, buf); + return; + } + + /* + * Simulate a firewall blocking UDP packets bigger than + * 'maxudp' bytes. + */ + maxudp = atomic_load(&sock->mgr->maxudp); + if (maxudp != 0 && (uint32_t)nrecv > maxudp) { + return; + } + + result = isc_sockaddr_fromsockaddr(&sockaddr, addr); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + uv_udp_getsockname(handle, (struct sockaddr *) &laddr, + &(int){sizeof(struct sockaddr_storage)}); + result = isc_sockaddr_fromsockaddr(&localaddr, + (struct sockaddr *) &laddr); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + + nmhandle = isc__nmhandle_get(sock, &sockaddr, &localaddr); + region.base = (unsigned char *) buf->base; + region.length = nrecv; + + INSIST(sock->rcb.recv != NULL); + sock->rcb.recv(nmhandle, ®ion, sock->rcbarg); + isc__nm_free_uvbuf(sock, buf); + + /* + * If the recv callback wants to hold on to the handle, + * it needs to attach to it. + */ + isc_nmhandle_unref(nmhandle); +} + +/* + * isc__nm_udp_send sends buf to a peer on a socket. + * It tries to find a proper sibling/child socket so that we won't have + * to jump to other thread. + */ +isc_result_t +isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region, + isc_nm_cb_t cb, void *cbarg) +{ + isc_nmsocket_t *psock = NULL, *rsock = NULL; + isc_nmsocket_t *sock = handle->sock; + isc_sockaddr_t *peer = &handle->peer; + isc__netievent_udpsend_t *ievent; + isc__nm_uvreq_t *uvreq = NULL; + int ntid; + uint32_t maxudp = atomic_load(&sock->mgr->maxudp); + + /* + * Simulate a firewall blocking UDP packets bigger than + * 'maxudp' bytes. + */ + if (maxudp != 0 && region->length > maxudp) { + isc_nmhandle_unref(handle); + return (ISC_R_SUCCESS); + } + + if (sock->type == isc_nm_udpsocket) { + INSIST(sock->parent != NULL); + psock = sock->parent; + } else if (sock->type == isc_nm_udplistener) { + psock = sock; + } else { + isc_nmhandle_unref(handle); + return (ISC_R_UNEXPECTED); + } + + if (isc__nm_in_netthread()) { + ntid = isc_nm_tid(); + } else { + ntid = (int) isc_random_uniform(sock->nchildren); + } + + rsock = &psock->children[ntid]; + + uvreq = isc__nm_uvreq_get(sock->mgr, sock); + uvreq->uvbuf.base = (char *) region->base; + uvreq->uvbuf.len = region->length; + + uvreq->handle = handle; + isc_nmhandle_ref(uvreq->handle); + + uvreq->cb.send = cb; + uvreq->cbarg = cbarg; + + if (isc_nm_tid() == rsock->tid) { + /* + * If we're in the same thread as the socket we can send the + * data directly + */ + return (udp_send_direct(rsock, uvreq, peer)); + } else { + /* + * We need to create an event and pass it using async channel + */ + ievent = isc__nm_get_ievent(sock->mgr, netievent_udpsend); + ievent->sock = rsock; + ievent->peer = *peer; + ievent->req = uvreq; + + isc__nm_enqueue_ievent(&sock->mgr->workers[rsock->tid], + (isc__netievent_t *) ievent); + return (ISC_R_SUCCESS); + } +} + + +/* + * handle 'udpsend' async event - send a packet on the socket + */ +void +isc__nm_async_udpsend(isc__networker_t *worker, isc__netievent_t *ievent0) { + isc__netievent_udpsend_t *ievent = + (isc__netievent_udpsend_t *) ievent0; + + REQUIRE(worker->id == ievent->sock->tid); + + if (atomic_load(&ievent->sock->active)) { + udp_send_direct(ievent->sock, ievent->req, &ievent->peer); + } else { + ievent->req->cb.send(ievent->req->handle, + ISC_R_CANCELED, ievent->req->cbarg); + isc__nm_uvreq_put(&ievent->req, ievent->req->sock); + } +} + +/* + * udp_send_cb - callback + */ +static void +udp_send_cb(uv_udp_send_t *req, int status) { + isc_result_t result = ISC_R_SUCCESS; + isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data; + + REQUIRE(VALID_UVREQ(uvreq)); + REQUIRE(VALID_NMHANDLE(uvreq->handle)); + + if (status < 0) { + result = isc__nm_uverr2result(status); + } + + uvreq->cb.send(uvreq->handle, result, uvreq->cbarg); + isc_nmhandle_unref(uvreq->handle); + isc__nm_uvreq_put(&uvreq, uvreq->sock); +} + +/* + * udp_send_direct sends buf to a peer on a socket. Sock has to be in + * the same thread as the callee. + */ +static isc_result_t +udp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, + isc_sockaddr_t *peer) +{ + int rv; + + REQUIRE(sock->tid == isc_nm_tid()); + REQUIRE(sock->type == isc_nm_udpsocket); + + isc_nmhandle_ref(req->handle); + rv = uv_udp_send(&req->uv_req.udp_send, + &sock->uv_handle.udp, &req->uvbuf, 1, + &peer->type.sa, udp_send_cb); + if (rv < 0) { + return (isc__nm_uverr2result(rv)); + } + + return (ISC_R_SUCCESS); +} diff --git a/lib/isc/netmgr/uverr2result.c b/lib/isc/netmgr/uverr2result.c new file mode 100644 index 0000000000..09a3d6ecd0 --- /dev/null +++ b/lib/isc/netmgr/uverr2result.c @@ -0,0 +1,91 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include "netmgr-int.h" + +/*% + * Convert a libuv error value into an isc_result_t. The + * list of supported error values is not complete; new users + * of this function should add any expected errors that are + * not already there. + */ +isc_result_t +isc___nm_uverr2result(int uverr, bool dolog, + const char *file, unsigned int line) +{ + switch (uverr) { + case UV_ENOTDIR: + case UV_ELOOP: + case UV_EINVAL: /* XXX sometimes this is not for files */ + case UV_ENAMETOOLONG: + case UV_EBADF: + return (ISC_R_INVALIDFILE); + case UV_ENOENT: + return (ISC_R_FILENOTFOUND); + case UV_EACCES: + case UV_EPERM: + return (ISC_R_NOPERM); + case UV_EEXIST: + return (ISC_R_FILEEXISTS); + case UV_EIO: + return (ISC_R_IOERROR); + case UV_ENOMEM: + return (ISC_R_NOMEMORY); + case UV_ENFILE: + case UV_EMFILE: + return (ISC_R_TOOMANYOPENFILES); + case UV_ENOSPC: + return (ISC_R_DISCFULL); + case UV_EPIPE: + case UV_ECONNRESET: + case UV_ECONNABORTED: + return (ISC_R_CONNECTIONRESET); + case UV_ENOTCONN: + return (ISC_R_NOTCONNECTED); + case UV_ETIMEDOUT: + return (ISC_R_TIMEDOUT); + case UV_ENOBUFS: + return (ISC_R_NORESOURCES); + case UV_EAFNOSUPPORT: + return (ISC_R_FAMILYNOSUPPORT); + case UV_ENETDOWN: + return (ISC_R_NETDOWN); + case UV_EHOSTDOWN: + return (ISC_R_HOSTDOWN); + case UV_ENETUNREACH: + return (ISC_R_NETUNREACH); + case UV_EHOSTUNREACH: + return (ISC_R_HOSTUNREACH); + case UV_EADDRINUSE: + return (ISC_R_ADDRINUSE); + case UV_EADDRNOTAVAIL: + return (ISC_R_ADDRNOTAVAIL); + case UV_ECONNREFUSED: + return (ISC_R_CONNREFUSED); + default: + if (dolog) { + UNEXPECTED_ERROR(file, line, + "unable to convert libuv " + "error code to isc_result: %d: %s", + uverr, uv_strerror(uverr)); + } + return (ISC_R_UNEXPECTED); + } +} diff --git a/lib/isc/queue.c b/lib/isc/queue.c new file mode 100644 index 0000000000..1729079906 --- /dev/null +++ b/lib/isc/queue.c @@ -0,0 +1,219 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#define BUFFER_SIZE 1024 + +#define MAX_THREADS 128 + +static uintptr_t nulluintptr = (uintptr_t)NULL; + +typedef struct node { + atomic_uint_fast32_t deqidx; + atomic_uintptr_t items[BUFFER_SIZE]; + atomic_uint_fast32_t enqidx; + atomic_uintptr_t next; + isc_mem_t *mctx; +} node_t; + +/* we just need one Hazard Pointer */ +#define HP_TAIL 0 +#define HP_HEAD 0 + +struct isc_queue { + alignas(128) atomic_uintptr_t head; + alignas(128) atomic_uintptr_t tail; + isc_mem_t *mctx; + int max_threads; + int taken; + isc_hp_t *hp; +}; + +static node_t * +node_new(isc_mem_t *mctx, uintptr_t item) { + node_t *node = isc_mem_get(mctx, sizeof(*node)); + *node = (node_t){ + .mctx = NULL + }; + + atomic_init(&node->deqidx, 0); + atomic_init(&node->enqidx, 1); + atomic_init(&node->next, 0); + atomic_init(&node->items[0], item); + + for (int i = 1; i < BUFFER_SIZE; i++) { + atomic_init(&node->items[i], 0); + } + + isc_mem_attach(mctx, &node->mctx); + + return (node); +} + +static void +node_destroy(void *node0) { + node_t *node = (node_t *)node0; + + isc_mem_putanddetach(&node->mctx, node, sizeof(*node)); +} + +static bool +node_cas_next(node_t *node, node_t *cmp, const node_t *val) { + return (atomic_compare_exchange_strong(&node->next, + (uintptr_t *)&cmp, + (uintptr_t)val)); +} + +static bool +queue_cas_tail(isc_queue_t *queue, node_t *cmp, const node_t *val) { + return (atomic_compare_exchange_strong(&queue->tail, + (uintptr_t *)&cmp, + (uintptr_t)val)); +} + +static bool +queue_cas_head(isc_queue_t *queue, node_t *cmp, const node_t *val) { + return (atomic_compare_exchange_strong(&queue->head, + (uintptr_t *)&cmp, + (uintptr_t)val)); +} + +isc_queue_t * +isc_queue_new(isc_mem_t *mctx, int max_threads) { + isc_queue_t *queue = isc_mem_get(mctx, sizeof(*queue)); + node_t *sentinel = node_new(mctx, nulluintptr); + + if (max_threads == 0) { + max_threads = MAX_THREADS; + } + + *queue = (isc_queue_t){ + .max_threads = max_threads, + }; + + isc_mem_attach(mctx, &queue->mctx); + + queue->hp = isc_hp_new(mctx, 1, node_destroy); + + atomic_init(&sentinel->enqidx, 0); + atomic_init(&queue->head, (uintptr_t)sentinel); + atomic_init(&queue->tail, (uintptr_t)sentinel); + + return (queue); +} + +void +isc_queue_enqueue(isc_queue_t *queue, uintptr_t item) { + REQUIRE(item != nulluintptr); + + while (true) { + node_t *lt = NULL; + uint_fast32_t idx; + uintptr_t n = nulluintptr; + + lt = (node_t *)isc_hp_protect(queue->hp, 0, &queue->tail); + idx = atomic_fetch_add(<->enqidx, 1); + if (idx > BUFFER_SIZE-1) { + node_t *lnext = NULL; + + if (lt != (node_t *)atomic_load(&queue->tail)) { + continue; + } + + lnext = (node_t *)atomic_load(<->next); + if (lnext == NULL) { + node_t *newnode = node_new(queue->mctx, item); + if (node_cas_next(lt, NULL, newnode)) { + queue_cas_tail(queue, lt, newnode); + isc_hp_clear(queue->hp); + return; + } + node_destroy(newnode); + } else { + queue_cas_tail(queue, lt, lnext); + } + + continue; + } + + if (atomic_compare_exchange_strong(<->items[idx], &n, item)) { + isc_hp_clear(queue->hp); + return; + } + } +} + +uintptr_t +isc_queue_dequeue(isc_queue_t *queue) { + REQUIRE(queue != NULL); + + while (true) { + node_t *lh = NULL; + uint_fast32_t idx; + uintptr_t item; + + lh = (node_t *)isc_hp_protect(queue->hp, 0, &queue->head); + if (atomic_load(&lh->deqidx) >= atomic_load(&lh->enqidx) && + atomic_load(&lh->next) == nulluintptr) + { + break; + } + + idx = atomic_fetch_add(&lh->deqidx, 1); + if (idx > BUFFER_SIZE-1) { + node_t *lnext = (node_t *)atomic_load(&lh->next); + if (lnext == NULL) { + break; + } + if (queue_cas_head(queue, lh, lnext)) { + isc_hp_retire(queue->hp, (uintptr_t)lh); + } + + continue; + } + + item = atomic_exchange(&(lh->items[idx]), + (uintptr_t)&queue->taken); + if (item == nulluintptr) { + continue; + } + + isc_hp_clear(queue->hp); + return (item); + } + + isc_hp_clear(queue->hp); + return (nulluintptr); +} + +void +isc_queue_destroy(isc_queue_t *queue) { + node_t *last = NULL; + + REQUIRE(queue != NULL); + + while (isc_queue_dequeue(queue) != nulluintptr) { + /* do nothing */ + } + + last = (node_t *)atomic_load_relaxed(&queue->head); + node_destroy(last); + isc_hp_destroy(queue->hp); + isc_mem_putanddetach(&queue->mctx, queue, sizeof(*queue)); +} diff --git a/lib/isc/sockaddr.c b/lib/isc/sockaddr.c index 832be1c2ce..19331281e7 100644 --- a/lib/isc/sockaddr.c +++ b/lib/isc/sockaddr.c @@ -472,3 +472,33 @@ isc_sockaddr_frompath(isc_sockaddr_t *sockaddr, const char *path) { return (ISC_R_NOTIMPLEMENTED); #endif } + +isc_result_t +isc_sockaddr_fromsockaddr(isc_sockaddr_t *isa, const struct sockaddr *sa) { + unsigned int length = 0; + + switch (sa->sa_family) { + case AF_INET: + length = sizeof(isa->type.sin); + break; + case AF_INET6: + length = sizeof(isa->type.sin6); + break; +#ifdef ISC_PLATFORM_HAVESYSUNH + case AF_UNIX: + length = sizeof(isa->type.sunix); + break; +#endif + default: + return (ISC_R_NOTIMPLEMENTED); + } + if (length == 0) { + return (ISC_R_NOTIMPLEMENTED); + } + + memset(isa, 0, sizeof(isc_sockaddr_t)); + memcpy(isa, sa, length); + isa->length = length; + + return (ISC_R_SUCCESS); +} diff --git a/lib/isc/task.c b/lib/isc/task.c index 88e5fb1d83..81009538c0 100644 --- a/lib/isc/task.c +++ b/lib/isc/task.c @@ -78,8 +78,8 @@ ***/ typedef enum { - task_state_idle, task_state_ready, task_state_running, - task_state_done + task_state_idle, task_state_ready, task_state_paused, + task_state_running, task_state_done } task_state_t; #if defined(HAVE_LIBXML2) || defined(HAVE_JSON_C) @@ -155,6 +155,7 @@ struct isc__taskmgr { atomic_uint_fast32_t curq; atomic_uint_fast32_t tasks_count; isc__taskqueue_t *queues; + isc_nm_t *nm; /* Locked by task manager lock. */ unsigned int default_quantum; @@ -370,6 +371,7 @@ task_shutdown(isc__task_t *task) { was_idle = true; } INSIST(task->state == task_state_ready || + task->state == task_state_paused || task->state == task_state_running); /* @@ -405,7 +407,8 @@ task_ready(isc__task_t *task) { LOCK(&manager->queues[task->threadid].lock); push_readyq(manager, task, task->threadid); if (atomic_load(&manager->mode) == isc_taskmgrmode_normal || - has_privilege) { + has_privilege) + { SIGNAL(&manager->queues[task->threadid].work_available); } UNLOCK(&manager->queues[task->threadid].lock); @@ -489,7 +492,8 @@ task_send(isc__task_t *task, isc_event_t **eventp, int c) { task->state = task_state_ready; } INSIST(task->state == task_state_ready || - task->state == task_state_running); + task->state == task_state_running || + task->state == task_state_paused); ENQUEUE(task->events, event, ev_link); task->nevents++; *eventp = NULL; @@ -1132,12 +1136,15 @@ dispatch(isc__taskmgr_t *manager, unsigned int threadid) { event); LOCK(&task->lock); } + XTRACE("execution complete"); dispatch_count++; } - if (isc_refcount_current(&task->references) == 0 && + if (isc_refcount_current( + &task->references) == 0 && EMPTY(task->events) && - !TASK_SHUTTINGDOWN(task)) { + !TASK_SHUTTINGDOWN(task)) + { bool was_idle; /* @@ -1172,16 +1179,19 @@ dispatch(isc__taskmgr_t *manager, unsigned int threadid) { * right now. */ XTRACE("empty"); - if (isc_refcount_current(&task->references) == 0 && - TASK_SHUTTINGDOWN(task)) { + if (isc_refcount_current( + &task->references) == 0 && + TASK_SHUTTINGDOWN(task)) + { /* * The task is done. */ XTRACE("done"); finished = true; task->state = task_state_done; - } else + } else { task->state = task_state_idle; + } done = true; } else if (dispatch_count >= task->quantum) { /* @@ -1323,7 +1333,8 @@ manager_free(isc__taskmgr_t *manager) { isc_result_t isc_taskmgr_create(isc_mem_t *mctx, unsigned int workers, - unsigned int default_quantum, isc_taskmgr_t **managerp) + unsigned int default_quantum, + isc_nm_t *nm, isc_taskmgr_t **managerp) { unsigned int i; isc__taskmgr_t *manager; @@ -1336,11 +1347,12 @@ isc_taskmgr_create(isc_mem_t *mctx, unsigned int workers, REQUIRE(managerp != NULL && *managerp == NULL); manager = isc_mem_get(mctx, sizeof(*manager)); - RUNTIME_CHECK(manager != NULL); - manager->common.impmagic = TASK_MANAGER_MAGIC; - manager->common.magic = ISCAPI_TASKMGR_MAGIC; + *manager = (isc__taskmgr_t) { + .common.impmagic = TASK_MANAGER_MAGIC, + .common.magic = ISCAPI_TASKMGR_MAGIC + }; + atomic_store(&manager->mode, isc_taskmgrmode_normal); - manager->mctx = NULL; isc_mutex_init(&manager->lock); isc_mutex_init(&manager->excl_lock); @@ -1353,6 +1365,11 @@ isc_taskmgr_create(isc_mem_t *mctx, unsigned int workers, default_quantum = DEFAULT_DEFAULT_QUANTUM; } manager->default_quantum = default_quantum; + + if (nm != NULL) { + isc_nm_attach(nm, &manager->nm); + } + INIT_LIST(manager->tasks); atomic_store(&manager->tasks_count, 0); manager->queues = isc_mem_get(mctx, @@ -1363,8 +1380,6 @@ isc_taskmgr_create(isc_mem_t *mctx, unsigned int workers, atomic_init(&manager->tasks_ready, 0); atomic_init(&manager->curq, 0); atomic_init(&manager->exiting, false); - manager->excl = NULL; - manager->halted = 0; atomic_store_relaxed(&manager->exclusive_req, false); atomic_store_relaxed(&manager->pause_req, false); @@ -1485,6 +1500,13 @@ isc_taskmgr_destroy(isc_taskmgr_t **managerp) { isc_thread_join(manager->queues[i].thread, NULL); } + /* + * Detach from the network manager if it was set. + */ + if (manager->nm != NULL) { + isc_nm_detach(&manager->nm); + } + manager_free(manager); *managerp = NULL; @@ -1601,6 +1623,9 @@ isc_task_beginexclusive(isc_task_t *task0) { WAIT(&manager->halt_cond, &manager->halt_lock); } UNLOCK(&manager->halt_lock); + if (manager->nm != NULL) { + isc_nm_pause(manager->nm); + } return (ISC_R_SUCCESS); } @@ -1611,9 +1636,11 @@ isc_task_endexclusive(isc_task_t *task0) { REQUIRE(VALID_TASK(task)); REQUIRE(task->state == task_state_running); - manager = task->manager; + if (manager->nm != NULL) { + isc_nm_resume(manager->nm); + } LOCK(&manager->halt_lock); REQUIRE(atomic_load_relaxed(&manager->exclusive_req) == true); atomic_store_relaxed(&manager->exclusive_req, false); @@ -1624,6 +1651,55 @@ isc_task_endexclusive(isc_task_t *task0) { UNLOCK(&manager->halt_lock); } +void +isc_task_pause(isc_task_t *task0) { + REQUIRE(ISCAPI_TASK_VALID(task0)); + isc__task_t *task = (isc__task_t *)task0; + isc__taskmgr_t *manager = task->manager; + bool running = false; + + LOCK(&task->lock); + INSIST(task->state == task_state_idle || + task->state == task_state_ready || + task->state == task_state_running); + running = (task->state == task_state_running); + task->state = task_state_paused; + UNLOCK(&task->lock); + + if (running) { + return; + } + + LOCK(&manager->queues[task->threadid].lock); + if (ISC_LINK_LINKED(task, ready_link)) { + DEQUEUE(manager->queues[task->threadid].ready_tasks, + task, ready_link); + } + UNLOCK(&manager->queues[task->threadid].lock); +} + +void +isc_task_unpause(isc_task_t *task0) { + isc__task_t *task = (isc__task_t *)task0; + bool was_idle = false; + + REQUIRE(ISCAPI_TASK_VALID(task0)); + + LOCK(&task->lock); + INSIST(task->state == task_state_paused); + if (!EMPTY(task->events)) { + task->state = task_state_ready; + was_idle = true; + } else { + task->state = task_state_idle; + } + UNLOCK(&task->lock); + + if (was_idle) { + task_ready(task); + } +} + void isc_task_setprivilege(isc_task_t *task0, bool priv) { REQUIRE(ISCAPI_TASK_VALID(task0)); @@ -1889,8 +1965,8 @@ isc_taskmgr_createinctx(isc_mem_t *mctx, { isc_result_t result; - result = isc_taskmgr_create(mctx, workers, default_quantum, - managerp); + result = isc_taskmgr_create(mctx, workers, default_quantum, NULL, + managerp); return (result); } diff --git a/lib/isc/tests/Kyuafile b/lib/isc/tests/Kyuafile index 4f25e19392..834261b321 100644 --- a/lib/isc/tests/Kyuafile +++ b/lib/isc/tests/Kyuafile @@ -16,7 +16,6 @@ tap_test_program{name='mem_test'} tap_test_program{name='netaddr_test'} tap_test_program{name='parse_test'} tap_test_program{name='pool_test'} -tap_test_program{name='queue_test'} tap_test_program{name='radix_test'} tap_test_program{name='regex_test'} tap_test_program{name='result_test'} diff --git a/lib/isc/tests/Makefile.in b/lib/isc/tests/Makefile.in index abeca57a62..046b8dcff3 100644 --- a/lib/isc/tests/Makefile.in +++ b/lib/isc/tests/Makefile.in @@ -33,7 +33,7 @@ SRCS = isctest.c aes_test.c buffer_test.c \ counter_test.c crc64_test.c errno_test.c file_test.c hash_test.c \ heap_test.c hmac_test.c ht_test.c lex_test.c \ mem_test.c md_test.c netaddr_test.c parse_test.c pool_test.c \ - queue_test.c radix_test.c random_test.c \ + radix_test.c random_test.c \ regex_test.c result_test.c safe_test.c siphash_test.c sockaddr_test.c \ socket_test.c socket_test.c symtab_test.c task_test.c \ taskpool_test.c time_test.c timer_test.c @@ -46,7 +46,7 @@ TARGETS = aes_test@EXEEXT@ buffer_test@EXEEXT@ \ ht_test@EXEEXT@ \ lex_test@EXEEXT@ mem_test@EXEEXT@ md_test@EXEEXT@ \ netaddr_test@EXEEXT@ parse_test@EXEEXT@ pool_test@EXEEXT@ \ - queue_test@EXEEXT@ radix_test@EXEEXT@ \ + radix_test@EXEEXT@ \ random_test@EXEEXT@ regex_test@EXEEXT@ result_test@EXEEXT@ \ safe_test@EXEEXT@ siphash_test@EXEEXT@ sockaddr_test@EXEEXT@ socket_test@EXEEXT@ \ socket_test@EXEEXT@ symtab_test@EXEEXT@ task_test@EXEEXT@ \ @@ -134,11 +134,6 @@ pool_test@EXEEXT@: pool_test.@O@ isctest.@O@ ${ISCDEPLIBS} ${LDFLAGS} -o $@ pool_test.@O@ isctest.@O@ \ ${ISCLIBS} ${LIBS} -queue_test@EXEEXT@: queue_test.@O@ isctest.@O@ ${ISCDEPLIBS} - ${LIBTOOL_MODE_LINK} ${PURIFY} ${CC} ${CFLAGS} \ - ${LDFLAGS} -o $@ queue_test.@O@ isctest.@O@ \ - ${ISCLIBS} ${LIBS} - radix_test@EXEEXT@: radix_test.@O@ isctest.@O@ ${ISCDEPLIBS} ${LIBTOOL_MODE_LINK} ${PURIFY} ${CC} ${CFLAGS} \ ${LDFLAGS} -o $@ radix_test.@O@ isctest.@O@ \ diff --git a/lib/isc/tests/isctest.c b/lib/isc/tests/isctest.c index ed5801b56e..08ced36b30 100644 --- a/lib/isc/tests/isctest.c +++ b/lib/isc/tests/isctest.c @@ -33,6 +33,7 @@ isc_log_t *lctx = NULL; isc_taskmgr_t *taskmgr = NULL; isc_timermgr_t *timermgr = NULL; isc_socketmgr_t *socketmgr = NULL; +isc_nm_t *netmgr = NULL; isc_task_t *maintask = NULL; int ncpus; @@ -55,6 +56,9 @@ static isc_logcategory_t categories[] = { static void cleanup_managers(void) { + if (netmgr != NULL) { + isc_nm_detach(&netmgr); + } if (maintask != NULL) { isc_task_shutdown(maintask); isc_task_destroy(&maintask); @@ -84,12 +88,13 @@ create_managers(unsigned int workers) { workers = atoi(p); } - CHECK(isc_taskmgr_create(mctx, workers, 0, &taskmgr)); + CHECK(isc_taskmgr_create(mctx, workers, 0, NULL, &taskmgr)); CHECK(isc_task_create(taskmgr, 0, &maintask)); isc_taskmgr_setexcltask(taskmgr, maintask); CHECK(isc_timermgr_create(mctx, &timermgr)); CHECK(isc_socketmgr_create(mctx, &socketmgr)); + netmgr = isc_nm_start(mctx, 3); return (ISC_R_SUCCESS); cleanup: diff --git a/lib/isc/tests/isctest.h b/lib/isc/tests/isctest.h index 481e7038a8..c89996721a 100644 --- a/lib/isc/tests/isctest.h +++ b/lib/isc/tests/isctest.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +38,7 @@ extern isc_log_t *lctx; extern isc_taskmgr_t *taskmgr; extern isc_timermgr_t *timermgr; extern isc_socketmgr_t *socketmgr; +extern isc_nm_t *netmgr; extern int ncpus; isc_result_t diff --git a/lib/isc/tests/queue_test.c b/lib/isc/tests/queue_test.c deleted file mode 100644 index 89ed2d4e23..0000000000 --- a/lib/isc/tests/queue_test.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (C) Internet Systems Consortium, Inc. ("ISC") - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * See the COPYRIGHT file distributed with this work for additional - * information regarding copyright ownership. - */ - -#if HAVE_CMOCKA - -#include -#include -#include - -#include /* IWYU pragma: keep */ -#include -#include -#include -#include - -#define UNIT_TESTING -#include - -#include -#include - -#include "isctest.h" - -static int -_setup(void **state) { - isc_result_t result; - - UNUSED(state); - - result = isc_test_begin(NULL, true, 0); - assert_int_equal(result, ISC_R_SUCCESS); - - return (0); -} - -static int -_teardown(void **state) { - UNUSED(state); - - isc_test_end(); - - return (0); -} - -typedef struct item item_t; -struct item { - int value; - ISC_QLINK(item_t) qlink; -}; - -typedef ISC_QUEUE(item_t) item_queue_t; - -static void -item_init(item_t *item, int value) { - item->value = value; - ISC_QLINK_INIT(item, qlink); -} - -/* Test UDP sendto/recv (IPv4) */ -static void -queue_valid(void **state) { - item_queue_t queue; - item_t one, two, three, four, five; - item_t *p; - - UNUSED(state); - - ISC_QUEUE_INIT(queue, qlink); - - item_init(&one, 1); - item_init(&two, 2); - item_init(&three, 3); - item_init(&four, 4); - item_init(&five, 5); - - assert_true(ISC_QUEUE_EMPTY(queue)); - - ISC_QUEUE_POP(queue, qlink, p); - assert_null(p); - - assert_false(ISC_QLINK_LINKED(&one, qlink)); - ISC_QUEUE_PUSH(queue, &one, qlink); - assert_true(ISC_QLINK_LINKED(&one, qlink)); - - assert_false(ISC_QUEUE_EMPTY(queue)); - - ISC_QUEUE_POP(queue, qlink, p); - assert_non_null(p); - assert_int_equal(p->value, 1); - assert_true(ISC_QUEUE_EMPTY(queue)); - assert_false(ISC_QLINK_LINKED(p, qlink)); - - ISC_QUEUE_PUSH(queue, p, qlink); - assert_false(ISC_QUEUE_EMPTY(queue)); - assert_true(ISC_QLINK_LINKED(p, qlink)); - - assert_false(ISC_QLINK_LINKED(&two, qlink)); - ISC_QUEUE_PUSH(queue, &two, qlink); - assert_true(ISC_QLINK_LINKED(&two, qlink)); - - assert_false(ISC_QLINK_LINKED(&three, qlink)); - ISC_QUEUE_PUSH(queue, &three, qlink); - assert_true(ISC_QLINK_LINKED(&three, qlink)); - - assert_false(ISC_QLINK_LINKED(&four, qlink)); - ISC_QUEUE_PUSH(queue, &four, qlink); - assert_true(ISC_QLINK_LINKED(&four, qlink)); - - assert_false(ISC_QLINK_LINKED(&five, qlink)); - ISC_QUEUE_PUSH(queue, &five, qlink); - assert_true(ISC_QLINK_LINKED(&five, qlink)); - - /* Test unlink by removing one item from the middle */ - ISC_QUEUE_UNLINK(queue, &three, qlink); - - ISC_QUEUE_POP(queue, qlink, p); - assert_non_null(p); - assert_int_equal(p->value, 1); - - ISC_QUEUE_POP(queue, qlink, p); - assert_non_null(p); - assert_int_equal(p->value, 2); - - ISC_QUEUE_POP(queue, qlink, p); - assert_non_null(p); - assert_int_equal(p->value, 4); - - ISC_QUEUE_POP(queue, qlink, p); - assert_non_null(p); - assert_int_equal(p->value, 5); - - assert_null(queue.head); - assert_null(queue.tail); - assert_true(ISC_QUEUE_EMPTY(queue)); - - ISC_QUEUE_DESTROY(queue); -} - -int -main(void) { - const struct CMUnitTest tests[] = { - cmocka_unit_test_setup_teardown(queue_valid, - _setup, _teardown), - }; - - return (cmocka_run_group_tests(tests, NULL, NULL)); -} - -#else /* HAVE_CMOCKA */ - -#include - -int -main(void) { - printf("1..0 # Skipped: cmocka not available\n"); - return (0); -} - -#endif diff --git a/lib/isc/tests/task_test.c b/lib/isc/tests/task_test.c index 282dab035b..9b15897a94 100644 --- a/lib/isc/tests/task_test.c +++ b/lib/isc/tests/task_test.c @@ -734,7 +734,7 @@ manytasks(void **state) { isc_mem_debugging = ISC_MEM_DEBUGRECORD; isc_mem_create(&mctx); - result = isc_taskmgr_create(mctx, 4, 0, &taskmgr); + result = isc_taskmgr_create(mctx, 4, 0, NULL, &taskmgr); assert_int_equal(result, ISC_R_SUCCESS); atomic_init(&done, false); diff --git a/lib/isc/unix/include/isc/align.h b/lib/isc/unix/include/isc/align.h new file mode 100644 index 0000000000..9cadbec345 --- /dev/null +++ b/lib/isc/unix/include/isc/align.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#pragma once + +#ifdef HAVE_STDALIGN_H +#include +#else +#define alignas(x) __attribute__ ((__aligned__ (x))) +#endif diff --git a/lib/isc/unix/include/isc/stdatomic.h b/lib/isc/unix/include/isc/stdatomic.h index f0909d232b..8e423a961b 100644 --- a/lib/isc/unix/include/isc/stdatomic.h +++ b/lib/isc/unix/include/isc/stdatomic.h @@ -80,6 +80,8 @@ typedef int_fast64_t atomic_int_fast64_t; typedef uint_fast64_t atomic_uint_fast64_t; typedef bool atomic_bool; +typedef uint_fast64_t atomic_uintptr_t; + #if defined(__CLANG_ATOMICS) /* __c11_atomic builtins */ #define atomic_init(obj, desired) \ __c11_atomic_init(obj, desired) @@ -99,6 +101,8 @@ typedef bool atomic_bool; __c11_atomic_compare_exchange_strong_explicit(obj, expected, desired, succ, fail) #define atomic_compare_exchange_weak_explicit(obj, expected, desired, succ, fail) \ __c11_atomic_compare_exchange_weak_explicit(obj, expected, desired, succ, fail) +#define atomic_exchange_explicit(obj, desired, order) \ + __c11_atomic_exchange_explicit(obj, expected, order) #elif defined(__GNUC_ATOMICS) /* __atomic builtins */ #define atomic_init(obj, desired) \ (*obj = desired) @@ -118,6 +122,8 @@ typedef bool atomic_bool; __atomic_compare_exchange_n(obj, expected, desired, 0, succ, fail) #define atomic_compare_exchange_weak_explicit(obj, expected, desired, succ, fail) \ __atomic_compare_exchange_n(obj, expected, desired, 1, succ, fail) +#define atomic_exchange_explicit(obj, desired, order) \ + __atomic_exchange_n(obj, desired, order) #else /* __sync builtins */ #define atomic_init(obj, desired) \ (*obj = desired) @@ -150,6 +156,9 @@ typedef bool atomic_bool; }) #define atomic_compare_exchange_weak_explicit(obj, expected, desired, succ, fail) \ atomic_compare_exchange_strong_explicit(obj, expected, desired, succ, fail) +#define atomic_exchange_explicit(obj, desired, order) \ + __sync_lock_test_and_set(obj, desired) + #endif #define atomic_load(obj) \ @@ -168,3 +177,5 @@ typedef bool atomic_bool; atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_seq_cst, memory_order_seq_cst) #define atomic_compare_exchange_weak(obj, expected, desired) \ atomic_compare_exchange_weak_explicit(obj, expected, desired, memory_order_seq_cst, memory_order_seq_cst) +#define atomic_exchange(obj, desired) \ + atomic_exchange_explicit(obj, desired, memory_order_seq_cst) diff --git a/lib/isc/unix/time.c b/lib/isc/unix/time.c index f06d31a550..da25e5bf8e 100644 --- a/lib/isc/unix/time.c +++ b/lib/isc/unix/time.c @@ -36,15 +36,10 @@ #define NS_PER_MS 1000000 /*%< Nanoseconds per millisecond. */ #define US_PER_S 1000000 /*%< Microseconds per second. */ -/* - * All of the INSIST()s checks of nanoseconds < NS_PER_S are for - * consistency checking of the type. In lieu of magic numbers, it - * is the best we've got. The check is only performed on functions which - * need an initialized type. - */ - -#ifndef ISC_FIX_TV_USEC -#define ISC_FIX_TV_USEC 1 +#ifdef CLOCK_REALTIME_COARSE +#define CLOCKSOURCE CLOCK_REALTIME_COARSE +#else +#define CLOCKSOURCE CLOCK_REALTIME #endif /*% @@ -54,32 +49,6 @@ static const isc_interval_t zero_interval = { 0, 0 }; const isc_interval_t * const isc_interval_zero = &zero_interval; -#if ISC_FIX_TV_USEC -static inline void -fix_tv_usec(struct timeval *tv) { - bool fixed = false; - - if (tv->tv_usec < 0) { - fixed = true; - do { - tv->tv_sec -= 1; - tv->tv_usec += US_PER_S; - } while (tv->tv_usec < 0); - } else if (tv->tv_usec >= US_PER_S) { - fixed = true; - do { - tv->tv_sec += 1; - tv->tv_usec -= US_PER_S; - } while (tv->tv_usec >=US_PER_S); - } - /* - * Call syslog directly as was are called from the logging functions. - */ - if (fixed) - (void)syslog(LOG_ERR, "gettimeofday returned bad tv_usec: corrected"); -} -#endif - void isc_interval_set(isc_interval_t *i, unsigned int seconds, unsigned int nanoseconds) @@ -141,76 +110,52 @@ isc_time_isepoch(const isc_time_t *t) { isc_result_t isc_time_now(isc_time_t *t) { - struct timeval tv; + struct timespec ts; char strbuf[ISC_STRERRORSIZE]; REQUIRE(t != NULL); - if (gettimeofday(&tv, NULL) == -1) { + if (clock_gettime(CLOCKSOURCE, &ts) == -1) { strerror_r(errno, strbuf, sizeof(strbuf)); UNEXPECTED_ERROR(__FILE__, __LINE__, "%s", strbuf); return (ISC_R_UNEXPECTED); } - /* - * Does POSIX guarantee the signedness of tv_sec and tv_usec? If not, - * then this test will generate warnings for platforms on which it is - * unsigned. In any event, the chances of any of these problems - * happening are pretty much zero, but since the libisc library ensures - * certain things to be true ... - */ -#if ISC_FIX_TV_USEC - fix_tv_usec(&tv); - if (tv.tv_sec < 0) + if (ts.tv_sec < 0 || ts.tv_nsec < 0 || ts.tv_nsec >= NS_PER_S) { return (ISC_R_UNEXPECTED); -#else - if (tv.tv_sec < 0 || tv.tv_usec < 0 || tv.tv_usec >= US_PER_S) - return (ISC_R_UNEXPECTED); -#endif + } /* * Ensure the tv_sec value fits in t->seconds. */ - if (sizeof(tv.tv_sec) > sizeof(t->seconds) && - ((tv.tv_sec | (unsigned int)-1) ^ (unsigned int)-1) != 0U) + if (sizeof(ts.tv_sec) > sizeof(t->seconds) && + ((ts.tv_sec | (unsigned int)-1) ^ (unsigned int)-1) != 0U) return (ISC_R_RANGE); - t->seconds = tv.tv_sec; - t->nanoseconds = tv.tv_usec * NS_PER_US; + t->seconds = ts.tv_sec; + t->nanoseconds = ts.tv_nsec; return (ISC_R_SUCCESS); } isc_result_t isc_time_nowplusinterval(isc_time_t *t, const isc_interval_t *i) { - struct timeval tv; + struct timespec ts; char strbuf[ISC_STRERRORSIZE]; REQUIRE(t != NULL); REQUIRE(i != NULL); INSIST(i->nanoseconds < NS_PER_S); - if (gettimeofday(&tv, NULL) == -1) { + if (clock_gettime(CLOCKSOURCE, &ts) == -1) { strerror_r(errno, strbuf, sizeof(strbuf)); UNEXPECTED_ERROR(__FILE__, __LINE__, "%s", strbuf); return (ISC_R_UNEXPECTED); } - /* - * Does POSIX guarantee the signedness of tv_sec and tv_usec? If not, - * then this test will generate warnings for platforms on which it is - * unsigned. In any event, the chances of any of these problems - * happening are pretty much zero, but since the libisc library ensures - * certain things to be true ... - */ -#if ISC_FIX_TV_USEC - fix_tv_usec(&tv); - if (tv.tv_sec < 0) + if (ts.tv_sec < 0 || ts.tv_nsec < 0 || ts.tv_nsec >= NS_PER_S) { return (ISC_R_UNEXPECTED); -#else - if (tv.tv_sec < 0 || tv.tv_usec < 0 || tv.tv_usec >= US_PER_S) - return (ISC_R_UNEXPECTED); -#endif + } /* * Ensure the resulting seconds value fits in the size of an @@ -218,12 +163,12 @@ isc_time_nowplusinterval(isc_time_t *t, const isc_interval_t *i) { * note that even if both values == INT_MAX, then when added * and getting another 1 added below the result is UINT_MAX.) */ - if ((tv.tv_sec > INT_MAX || i->seconds > INT_MAX) && - ((long long)tv.tv_sec + i->seconds > UINT_MAX)) + if ((ts.tv_sec > INT_MAX || i->seconds > INT_MAX) && + ((long long)ts.tv_sec + i->seconds > UINT_MAX)) return (ISC_R_RANGE); - t->seconds = tv.tv_sec + i->seconds; - t->nanoseconds = tv.tv_usec * NS_PER_US + i->nanoseconds; + t->seconds = ts.tv_sec + i->seconds; + t->nanoseconds = ts.tv_nsec + i->nanoseconds; if (t->nanoseconds >= NS_PER_S) { t->seconds++; t->nanoseconds -= NS_PER_S; diff --git a/lib/isc/win32/include/isc/align.h b/lib/isc/win32/include/isc/align.h new file mode 100644 index 0000000000..d5b02a32b6 --- /dev/null +++ b/lib/isc/win32/include/isc/align.h @@ -0,0 +1,13 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#pragma once +#define alignas(x) __declspec(align(x)) diff --git a/lib/isc/win32/include/isc/stdatomic.h b/lib/isc/win32/include/isc/stdatomic.h index b14d935d5b..60ac199af2 100644 --- a/lib/isc/win32/include/isc/stdatomic.h +++ b/lib/isc/win32/include/isc/stdatomic.h @@ -70,6 +70,7 @@ typedef int_fast32_t volatile atomic_int_fast32_t; typedef uint_fast32_t volatile atomic_uint_fast32_t; typedef int_fast64_t volatile atomic_int_fast64_t; typedef uint_fast64_t volatile atomic_uint_fast64_t; +typedef uintptr_t volatile atomic_uintptr_t; #define atomic_init(obj, desired) \ (*(obj) = (desired)) @@ -396,8 +397,7 @@ atomic_compare_exchange_strong_explicit64(atomic_int_fast64_t *obj, return (__r); } -static inline -bool +static inline bool atomic_compare_exchange_abort() { INSIST(0); ISC_UNREACHABLE(); @@ -419,8 +419,7 @@ atomic_compare_exchange_abort() { succ, fail) \ : atomic_compare_exchange_abort()))) -#define atomic_compare_exchange_strong(obj, expected, desired, \ - succ, fail) \ +#define atomic_compare_exchange_strong(obj, expected, desired) \ atomic_compare_exchange_strong_explicit(obj, expected, desired, \ memory_order_seq_cst, \ memory_order_seq_cst) @@ -434,3 +433,23 @@ atomic_compare_exchange_abort() { atomic_compare_exchange_weak_explicit(obj, expected, desired, \ memory_order_seq_cst, \ memory_order_seq_cst) + +static inline +bool +atomic_exchange_abort() { + INSIST(0); + ISC_UNREACHABLE(); +} + + +#define atomic_exchange_explicit(obj, desired, order) \ + (sizeof(*(obj)) == 8 \ + ? InterlockedExchange64(obj, desired) \ + : (sizeof(*(obj)) == 4 \ + ? InterlockedExchange(obj, desired) \ + : (sizeof(*(obj)) == 1 \ + ? InterlockedExchange8(obj, desired) \ + : atomic_exchange_abort()))) + +#define atomic_exchange(obj, desired) \ + atomic_exchange_explicit(obj, desired, memory_order_seq_cst) \ diff --git a/lib/isc/win32/libisc.def.in b/lib/isc/win32/libisc.def.in index 182a021705..9b3a45d45e 100644 --- a/lib/isc/win32/libisc.def.in +++ b/lib/isc/win32/libisc.def.in @@ -24,6 +24,10 @@ isc_app_start isc_app_unblock isc_appctx_create isc_appctx_destroy +isc_astack_destroy +isc_astack_new +isc_astack_pop +isc_astack_trypush isc__buffer_activeregion isc__buffer_add isc__buffer_availableregion @@ -242,6 +246,13 @@ isc_heap_insert isc_hex_decodestring isc_hex_tobuffer isc_hex_totext +isc_hp_clear +isc_hp_destroy +isc_hp_protect +isc_hp_protect_ptr +isc_hp_protect_release +isc_hp_new +isc_hp_retire isc_hmac isc_hmac_new isc_hmac_free @@ -252,6 +263,13 @@ isc_hmac_final isc_hmac_get_md_type isc_hmac_get_size isc_hmac_get_block_size +isc_hp_new +isc_hp_destroy +isc_hp_clear +isc_hp_protect +isc_hp_protect_ptr +isc_hp_protect_release +isc_hp_retire isc_ht_add isc_ht_count isc_ht_delete @@ -417,6 +435,29 @@ isc_netaddr_setzone isc_netaddr_totext isc_netaddr_unspec isc_netscope_pton +isc_nmhandle_getdata +isc_nmhandle_getextra +isc_nmhandle_is_stream +isc_nmhandle_localaddr +isc_nmhandle_peeraddr +isc_nmhandle_ref +isc_nmhandle_setdata +isc_nmhandle_unref +isc_nm_destroy +isc_nm_detach +isc_nm_listentcpdns +isc_nm_listenudp +isc_nm_maxudp +isc_nm_send +isc_nm_start +isc_nmsocket_detach +isc_nm_tcpdns_sequential +isc_nm_tcpdns_stoplistening +isc_nm_tid +isc_nm_udp_stoplistening +isc__nm_acquire_interlocked +isc__nm_drop_interlocked +isc__nm_acquire_interlocked_force isc_nonce_buf isc_ntpaths_get isc_ntpaths_init @@ -438,6 +479,10 @@ isc_portset_isset isc_portset_nports isc_portset_remove isc_portset_removerange +isc_queue_enqueue +isc_queue_dequeue +isc_queue_destroy +isc_queue_new isc_quota_attach isc_quota_destroy isc_quota_detach @@ -507,6 +552,7 @@ isc_sockaddr_fromin isc_sockaddr_fromin6 isc_sockaddr_fromnetaddr isc_sockaddr_frompath +isc_sockaddr_fromsockaddr isc_sockaddr_getport isc_sockaddr_hash isc_sockaddr_isexperimental @@ -565,6 +611,7 @@ isc_task_exiting isc_task_getcurrenttime isc_task_getcurrenttimex isc_task_onshutdown +isc_task_pause isc_task_privilege isc_task_purge isc_task_purgeevent @@ -576,6 +623,7 @@ isc_task_sendtoanddetach isc_task_setname isc_task_setprivilege isc_task_shutdown +isc_task_unpause isc_task_unsend isc_taskmgr_create isc_taskmgr_createinctx diff --git a/lib/isc/win32/libisc.vcxproj.filters.in b/lib/isc/win32/libisc.vcxproj.filters.in index 22591f44db..188f0dbdb4 100644 --- a/lib/isc/win32/libisc.vcxproj.filters.in +++ b/lib/isc/win32/libisc.vcxproj.filters.in @@ -35,6 +35,9 @@ Library Header Files + + Library Header Files + Library Header Files @@ -104,6 +107,9 @@ Library Header Files + + Library Header Files + Library Header Files @@ -449,6 +455,9 @@ Library Source Files + + Library Source Files + Library Source Files @@ -500,6 +509,9 @@ Library Source Files + + Library Source Files + Library Source Files @@ -548,6 +560,9 @@ Library Source Files + + Library Source Files + Library Source Files diff --git a/lib/isc/win32/libisc.vcxproj.in b/lib/isc/win32/libisc.vcxproj.in index 885d83c64e..798129677d 100644 --- a/lib/isc/win32/libisc.vcxproj.in +++ b/lib/isc/win32/libisc.vcxproj.in @@ -59,11 +59,11 @@ @IF PKCS11 BIND9;@PK11_LIB_LOCATION@WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBISC_EXPORTS;%(PreprocessorDefinitions);%(PreprocessorDefinitions) ..\..\..\config.h - .\;..\..\..\;@LIBXML2_INC@@OPENSSL_INC@@ZLIB_INC@include;..\include;win32;..\..\isccfg\include;..\..\dns\win32\include;..\..\dns\include;%(AdditionalIncludeDirectories) + .\;..\..\..\;@LIBXML2_INC@@LIBUV_INC@@OPENSSL_INC@@ZLIB_INC@include;..\include;win32;..\..\isccfg\include;..\..\dns\win32\include;..\..\dns\include;%(AdditionalIncludeDirectories) @ELSE PKCS11 BIND9;WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBISC_EXPORTS;%(PreprocessorDefinitions);%(PreprocessorDefinitions) ..\..\..\config.h - .\;..\..\..\;@LIBXML2_INC@@OPENSSL_INC@@ZLIB_INC@include;..\include;win32;..\..\isccfg\include;%(AdditionalIncludeDirectories) + .\;..\..\..\;@LIBXML2_INC@@LIBUV_INC@@OPENSSL_INC@@ZLIB_INC@include;..\include;win32;..\..\isccfg\include;%(AdditionalIncludeDirectories) @END PKCS11 true .\$(Configuration)\$(TargetName).pch @@ -77,7 +77,7 @@ Console true ..\..\..\Build\$(Configuration)\$(TargetName)$(TargetExt) - @OPENSSL_LIB@@LIBXML2_LIB@@ZLIB_LIB@ws2_32.lib;%(AdditionalDependencies) + @OPENSSL_LIB@@LIBUV_LIB@@LIBXML2_LIB@@ZLIB_LIB@ws2_32.lib;%(AdditionalDependencies) $(ProjectName).def .\$(Configuration)\$(ProjectName).lib @@ -96,6 +96,9 @@ echo Copying the OpenSSL DLL and LICENSE. copy @OPENSSL_DLL@ ..\Build\Debug\ copy @OPENSSL_PATH@\LICENSE ..\Build\Debug\OpenSSL-LICENSE +echo Copying libuv DLL. +copy @LIBUV_DLL@ ..\Build\Debug\ + @IF LIBXML2 echo Copying the libxml DLL. @@ -148,11 +151,11 @@ copy InstallFiles ..\Build\Debug\ @IF PKCS11 BIND9;@PK11_LIB_LOCATION@WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBISC_EXPORTS;%(PreprocessorDefinitions);%(PreprocessorDefinitions) ..\..\..\config.h - .\;..\..\..\;@LIBXML2_INC@@OPENSSL_INC@@ZLIB_INC@include;..\include;win32;..\..\isccfg\include;..\..\dns\win32\include;..\..\dns\include;%(AdditionalIncludeDirectories) + .\;..\..\..\;@LIBXML2_INC@@LIBUV_INC@@OPENSSL_INC@@ZLIB_INC@include;..\include;win32;..\..\isccfg\include;..\..\dns\win32\include;..\..\dns\include;%(AdditionalIncludeDirectories) @ELSE PKCS11 BIND9;WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBISC_EXPORTS;%(PreprocessorDefinitions);%(PreprocessorDefinitions) ..\..\..\config.h - .\;..\..\..\;@LIBXML2_INC@@OPENSSL_INC@@ZLIB_INC@include;..\include;win32;..\..\isccfg\include;%(AdditionalIncludeDirectories) + .\;..\..\..\;@LIBXML2_INC@@LIBUV_INC@@OPENSSL_INC@@ZLIB_INC@include;..\include;win32;..\..\isccfg\include;%(AdditionalIncludeDirectories) @END PKCS11 OnlyExplicitInline false @@ -169,7 +172,7 @@ copy InstallFiles ..\Build\Debug\ true true ..\..\..\Build\$(Configuration)\$(TargetName)$(TargetExt) - @OPENSSL_LIB@@LIBXML2_LIB@@ZLIB_LIB@ws2_32.lib;%(AdditionalDependencies) + @OPENSSL_LIB@@LIBUV_LIB@@LIBXML2_LIB@@ZLIB_LIB@ws2_32.lib;%(AdditionalDependencies) $(ProjectName).def .\$(Configuration)\$(ProjectName).lib Default @@ -239,6 +242,9 @@ echo Copying the OpenSSL DLL and LICENSE. copy @OPENSSL_DLL@ ..\Build\Release\ copy @OPENSSL_PATH@\LICENSE ..\Build\Release\OpenSSL-LICENSE +echo Copying libuv DLL. +copy @LIBUV_DLL@ ..\Build\Debug\ + @IF LIBXML2 echo Copying the libxml DLL. @@ -288,6 +294,7 @@ copy InstallFiles ..\Build\Release\ + @@ -311,6 +318,7 @@ copy InstallFiles ..\Build\Release\ + @@ -404,6 +412,7 @@ copy InstallFiles ..\Build\Release\ + @@ -421,6 +430,7 @@ copy InstallFiles ..\Build\Release\ + @@ -432,12 +442,18 @@ copy InstallFiles ..\Build\Release\ + + + + + + diff --git a/lib/ns/client.c b/lib/ns/client.c index 598e41179e..69400141a1 100644 --- a/lib/ns/client.c +++ b/lib/ns/client.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -54,6 +53,7 @@ #include #include +#include #include #include #include @@ -97,20 +97,6 @@ #define TCP_CLIENT(c) (((c)->attributes & NS_CLIENTATTR_TCP) != 0) -#define TCP_BUFFER_SIZE (65535 + 2) -#define SEND_BUFFER_SIZE 4096 -#define RECV_BUFFER_SIZE 4096 - -#define NMCTXS 100 -/*%< - * Number of 'mctx pools' for clients. (Should this be configurable?) - * When enabling threads, we use a pool of memory contexts shared by - * client objects, since concurrent access to a shared context would cause - * heavy contentions. The above constant is expected to be enough for - * completely avoiding contentions among threads for an authoritative-only - * server. - */ - #define COOKIE_SIZE 24U /* 8 + 4 + 4 + 8 */ #define ECS_SIZE 20U /* 2 + 1 + 1 + [0..16] */ @@ -119,108 +105,9 @@ #define WANTPAD(x) (((x)->attributes & NS_CLIENTATTR_WANTPAD) != 0) #define USEKEEPALIVE(x) (((x)->attributes & NS_CLIENTATTR_USEKEEPALIVE) != 0) -/*% nameserver client manager structure */ -struct ns_clientmgr { - /* Unlocked. */ - unsigned int magic; - - /* The queue object has its own locks */ - client_queue_t inactive; /*%< To be recycled */ - - isc_mem_t * mctx; - ns_server_t * sctx; - isc_taskmgr_t * taskmgr; - isc_timermgr_t * timermgr; - isc_task_t * excl; - - /* Lock covers manager state. */ - isc_mutex_t lock; - bool exiting; - - /* Lock covers the clients list */ - isc_mutex_t listlock; - client_list_t clients; /*%< All active clients */ - - /* Lock covers the recursing list */ - isc_mutex_t reclock; - client_list_t recursing; /*%< Recursing clients */ - -#if NMCTXS > 0 - /*%< mctx pool for clients. */ - unsigned int nextmctx; - isc_mem_t * mctxpool[NMCTXS]; -#endif -}; - #define MANAGER_MAGIC ISC_MAGIC('N', 'S', 'C', 'm') #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, MANAGER_MAGIC) -/*! - * Client object states. Ordering is significant: higher-numbered - * states are generally "more active", meaning that the client can - * have more dynamically allocated data, outstanding events, etc. - * In the list below, any such properties listed for state N - * also apply to any state > N. - * - * To force the client into a less active state, set client->newstate - * to that state and call exit_check(). This will cause any - * activities defined for higher-numbered states to be aborted. - */ - -#define NS_CLIENTSTATE_FREED 0 -/*%< - * The client object no longer exists. - */ - -#define NS_CLIENTSTATE_INACTIVE 1 -/*%< - * The client object exists and has a task and timer. - * Its "query" struct and sendbuf are initialized. - * It is on the client manager's list of inactive clients. - * It has a message and OPT, both in the reset state. - */ - -#define NS_CLIENTSTATE_READY 2 -/*%< - * The client object is either a TCP or a UDP one, and - * it is associated with a network interface. It is on the - * client manager's list of active clients. - * - * If it is a TCP client object, it has a TCP listener socket - * and an outstanding TCP listen request. - * - * If it is a UDP client object, it has a UDP listener socket - * and an outstanding UDP receive request. - */ - -#define NS_CLIENTSTATE_READING 3 -/*%< - * The client object is a TCP client object that has received - * a connection. It has a tcpsocket, tcpmsg, TCP quota, and an - * outstanding TCP read request. This state is not used for - * UDP client objects. - */ - -#define NS_CLIENTSTATE_WORKING 4 -/*%< - * The client object has received a request and is working - * on it. It has a view, and it may have any of a non-reset OPT, - * recursion quota, and an outstanding write request. - */ - -#define NS_CLIENTSTATE_RECURSING 5 -/*%< - * The client object is recursing. It will be on the 'recursing' - * list. - */ - -#define NS_CLIENTSTATE_MAX 9 -/*%< - * Sentinel value used to indicate "no state". When client->newstate - * has this value, we are not attempting to exit the current state. - * Must be greater than any valid state. - */ - /* * Enable ns_client_dropport() by default. */ @@ -230,22 +117,16 @@ struct ns_clientmgr { LIBNS_EXTERNAL_DATA unsigned int ns_client_requests; -static void read_settimeout(ns_client_t *client, bool newconn); -static void client_read(ns_client_t *client, bool newconn); -static void client_accept(ns_client_t *client); -static void client_udprecv(ns_client_t *client); +static void clientmgr_attach(ns_clientmgr_t *source, ns_clientmgr_t **targetp); +static void clientmgr_detach(ns_clientmgr_t **mp); static void clientmgr_destroy(ns_clientmgr_t *manager); -static bool exit_check(ns_client_t *client); static void ns_client_endrequest(ns_client_t *client); -static void client_start(isc_task_t *task, isc_event_t *event); static void ns_client_dumpmessage(ns_client_t *client, const char *reason); -static isc_result_t get_client(ns_clientmgr_t *manager, ns_interface_t *ifp, - dns_dispatch_t *disp, bool tcp); -static isc_result_t get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, - isc_socket_t *sock, ns_client_t *oldclient); static void compute_cookie(ns_client_t *client, uint32_t when, uint32_t nonce, const unsigned char *secret, isc_buffer_t *buf); +static void +get_clientmctx(ns_clientmgr_t *manager, isc_mem_t **mctxp); void ns_client_recursing(ns_client_t *client) { @@ -253,7 +134,7 @@ ns_client_recursing(ns_client_t *client) { REQUIRE(client->state == NS_CLIENTSTATE_WORKING); LOCK(&client->manager->reclock); - client->newstate = client->state = NS_CLIENTSTATE_RECURSING; + client->state = NS_CLIENTSTATE_RECURSING; ISC_LIST_APPEND(client->manager->recursing, client, rlink); UNLOCK(&client->manager->reclock); } @@ -275,596 +156,9 @@ ns_client_killoldestquery(ns_client_t *client) { void ns_client_settimeout(ns_client_t *client, unsigned int seconds) { - isc_result_t result; - isc_interval_t interval; - - isc_interval_set(&interval, seconds, 0); - result = isc_timer_reset(client->timer, isc_timertype_once, NULL, - &interval, false); - client->timerset = true; - if (result != ISC_R_SUCCESS) { - ns_client_log(client, NS_LOGCATEGORY_CLIENT, - NS_LOGMODULE_CLIENT, ISC_LOG_ERROR, - "setting timeout: %s", - isc_result_totext(result)); - /* Continue anyway. */ - } -} - -static void -read_settimeout(ns_client_t *client, bool newconn) { - isc_result_t result; - isc_interval_t interval; - unsigned int ds; - - if (newconn) - ds = client->sctx->initialtimo; - else if (USEKEEPALIVE(client)) - ds = client->sctx->keepalivetimo; - else - ds = client->sctx->idletimo; - - isc_interval_set(&interval, ds / 10, 100000000 * (ds % 10)); - result = isc_timer_reset(client->timer, isc_timertype_once, NULL, - &interval, false); - client->timerset = true; - if (result != ISC_R_SUCCESS) { - ns_client_log(client, NS_LOGCATEGORY_CLIENT, - NS_LOGMODULE_CLIENT, ISC_LOG_ERROR, - "setting timeout: %s", - isc_result_totext(result)); - /* Continue anyway. */ - } -} - -/*% - * Allocate a reference-counted object that will maintain a single pointer to - * the (also reference-counted) TCP client quota, shared between all the - * clients processing queries on a single TCP connection, so that all - * clients sharing the one socket will together consume only one slot in - * the 'tcp-clients' quota. - */ -static isc_result_t -tcpconn_init(ns_client_t *client, bool force) { - isc_result_t result; - isc_quota_t *quota = NULL; - ns_tcpconn_t *tconn = NULL; - - REQUIRE(client->tcpconn == NULL); - - /* - * Try to attach to the quota first, so we won't pointlessly - * allocate memory for a tcpconn object if we can't get one. - */ - if (force) { - result = isc_quota_force(&client->sctx->tcpquota, "a); - } else { - result = isc_quota_attach(&client->sctx->tcpquota, "a); - } - if (result != ISC_R_SUCCESS) { - return (result); - } - - /* - * A global memory context is used for the allocation as different - * client structures may have different memory contexts assigned and a - * reference counter allocated here might need to be freed by a - * different client. The performance impact caused by memory context - * contention here is expected to be negligible, given that this code - * is only executed for TCP connections. - */ - tconn = isc_mem_allocate(client->sctx->mctx, sizeof(*tconn)); - - isc_refcount_init(&tconn->refs, 1); - tconn->tcpquota = quota; - quota = NULL; - tconn->pipelined = false; - - client->tcpconn = tconn; - - return (ISC_R_SUCCESS); -} - -/*% - * Increase the count of client structures sharing the TCP connection - * that 'source' is associated with; add a pointer to the same tcpconn - * to 'target', thus associating it with the same TCP connection. - */ -static void -tcpconn_attach(ns_client_t *source, ns_client_t *target) { - int old_refs; - - REQUIRE(source->tcpconn != NULL); - REQUIRE(target->tcpconn == NULL); - REQUIRE(source->tcpconn->pipelined); - - old_refs = isc_refcount_increment(&source->tcpconn->refs); - INSIST(old_refs > 0); - target->tcpconn = source->tcpconn; -} - -/*% - * Decrease the count of client structures sharing the TCP connection that - * 'client' is associated with. If this is the last client using this TCP - * connection, we detach from the TCP quota and free the tcpconn - * object. Either way, client->tcpconn is set to NULL. - */ -static void -tcpconn_detach(ns_client_t *client) { - ns_tcpconn_t *tconn = NULL; - int old_refs; - - REQUIRE(client->tcpconn != NULL); - - tconn = client->tcpconn; - client->tcpconn = NULL; - - old_refs = isc_refcount_decrement(&tconn->refs); - INSIST(old_refs > 0); - - if (old_refs == 1) { - isc_quota_detach(&tconn->tcpquota); - isc_mem_free(client->sctx->mctx, tconn); - } -} - -/*% - * Mark a client as active and increment the interface's 'ntcpactive' - * counter, as a signal that there is at least one client servicing - * TCP queries for the interface. If we reach the TCP client quota at - * some point, this will be used to determine whether a quota overrun - * should be permitted. - * - * Marking the client active with the 'tcpactive' flag ensures proper - * accounting, by preventing us from incrementing or decrementing - * 'ntcpactive' more than once per client. - */ -static void -mark_tcp_active(ns_client_t *client, bool active) { - if (active && !client->tcpactive) { - isc_refcount_increment0(&client->interface->ntcpactive); - client->tcpactive = active; - } else if (!active && client->tcpactive) { - uint32_t old = - isc_refcount_decrement(&client->interface->ntcpactive); - INSIST(old > 0); - client->tcpactive = active; - } -} - -/*% - * Check for a deactivation or shutdown request and take appropriate - * action. Returns true if either is in progress; in this case - * the caller must no longer use the client object as it may have been - * freed. - */ -static bool -exit_check(ns_client_t *client) { - bool destroy_manager = false; - ns_clientmgr_t *manager = NULL; - - REQUIRE(NS_CLIENT_VALID(client)); - manager = client->manager; - - if (client->state <= client->newstate) - return (false); /* Business as usual. */ - - INSIST(client->newstate < NS_CLIENTSTATE_RECURSING); - - /* - * We need to detach from the view early when shutting down - * the server to break the following vicious circle: - * - * - The resolver will not shut down until the view refcount is zero - * - The view refcount does not go to zero until all clients detach - * - The client does not detach from the view until references is zero - * - references does not go to zero until the resolver has shut down - * - * Keep the view attached until any outstanding updates complete. - */ - if (client->nupdates == 0 && - client->newstate == NS_CLIENTSTATE_FREED && client->view != NULL) - dns_view_detach(&client->view); - - if (client->state == NS_CLIENTSTATE_WORKING || - client->state == NS_CLIENTSTATE_RECURSING) - { - INSIST(client->newstate <= NS_CLIENTSTATE_READING); - /* - * Let the update processing complete. - */ - if (client->nupdates > 0) - return (true); - - /* - * We are trying to abort request processing. - */ - if (client->nsends > 0) { - isc_socket_t *sock; - if (TCP_CLIENT(client)) - sock = client->tcpsocket; - else - sock = client->udpsocket; - isc_socket_cancel(sock, client->task, - ISC_SOCKCANCEL_SEND); - } - - if (! (client->nsends == 0 && client->nrecvs == 0 && - isc_refcount_current(&client->references) == 0)) - { - /* - * Still waiting for I/O cancel completion. - * or lingering references. - */ - return (true); - } - - /* - * I/O cancel is complete. Burn down all state - * related to the current request. Ensure that - * the client is no longer on the recursing list. - * - * We need to check whether the client is still linked, - * because it may already have been removed from the - * recursing list by ns_client_killoldestquery() - */ - if (client->state == NS_CLIENTSTATE_RECURSING) { - LOCK(&manager->reclock); - if (ISC_LINK_LINKED(client, rlink)) - ISC_LIST_UNLINK(manager->recursing, - client, rlink); - UNLOCK(&manager->reclock); - } - ns_client_endrequest(client); - - client->state = NS_CLIENTSTATE_READING; - INSIST(client->recursionquota == NULL); - - if (NS_CLIENTSTATE_READING == client->newstate) { - INSIST(client->tcpconn != NULL); - if (!client->tcpconn->pipelined) { - client_read(client, false); - client->newstate = NS_CLIENTSTATE_MAX; - return (true); /* We're done. */ - } else if (client->mortal) { - client->newstate = NS_CLIENTSTATE_INACTIVE; - } else - return (false); - } - } - - if (client->state == NS_CLIENTSTATE_READING) { - /* - * We are trying to abort the current TCP connection, - * if any. - */ - INSIST(client->recursionquota == NULL); - INSIST(client->newstate <= NS_CLIENTSTATE_READY); - - if (client->nreads > 0) { - dns_tcpmsg_cancelread(&client->tcpmsg); - /* Still waiting for read cancel completion? */ - if (client->nreads > 0) { - return (true); - } - } - - if (client->tcpmsg_valid) { - dns_tcpmsg_invalidate(&client->tcpmsg); - client->tcpmsg_valid = false; - } - - /* - * Soon the client will be ready to accept a new TCP - * connection or UDP request, but we may have enough - * clients doing that already. Check whether this client - * needs to remain active and allow it go inactive if - * not. - * - * UDP clients always go inactive at this point, but a TCP - * client may need to stay active and return to READY - * state if no other clients are available to listen - * for TCP requests on this interface. - * - * Regardless, if we're going to FREED state, that means - * the system is shutting down and we don't need to - * retain clients. - */ - if (client->mortal && TCP_CLIENT(client) && - client->newstate != NS_CLIENTSTATE_FREED && - (client->sctx->options & NS_SERVER_CLIENTTEST) == 0 && - isc_refcount_current(&client->interface->ntcpaccepting) == 0) - { - /* Nobody else is accepting */ - client->mortal = false; - client->newstate = NS_CLIENTSTATE_READY; - } - - /* - * Detach from TCP connection and TCP client quota, - * if appropriate. If this is the last reference to - * the TCP connection in our pipeline group, the - * TCP quota slot will be released. - */ - if (client->tcpconn) { - tcpconn_detach(client); - } - - if (client->tcpsocket != NULL) { - CTRACE("closetcp"); - isc_socket_detach(&client->tcpsocket); - mark_tcp_active(client, false); - } - - if (client->timerset) { - (void)isc_timer_reset(client->timer, - isc_timertype_inactive, - NULL, NULL, true); - client->timerset = false; - } - - client->peeraddr_valid = false; - - client->state = NS_CLIENTSTATE_READY; - - /* - * We don't need the client; send it to the inactive - * queue for recycling. - */ - if (client->mortal) { - if (client->newstate > NS_CLIENTSTATE_INACTIVE) { - client->newstate = NS_CLIENTSTATE_INACTIVE; - } - } - - if (NS_CLIENTSTATE_READY == client->newstate) { - if (TCP_CLIENT(client)) { - client_accept(client); - } else { - client_udprecv(client); - } - client->newstate = NS_CLIENTSTATE_MAX; - return (true); - } - } - - if (client->state == NS_CLIENTSTATE_READY) { - INSIST(client->newstate <= NS_CLIENTSTATE_INACTIVE); - - /* - * We are trying to enter the inactive state. - */ - if (client->naccepts > 0) { - isc_socket_cancel(client->tcplistener, client->task, - ISC_SOCKCANCEL_ACCEPT); - /* Still waiting for accept cancel completion? */ - if (client->naccepts > 0) { - return (true); - } - } - - /* Accept cancel is complete. */ - if (client->nrecvs > 0) { - isc_socket_cancel(client->udpsocket, client->task, - ISC_SOCKCANCEL_RECV); - /* Still waiting for recv cancel completion? */ - if (client->nrecvs > 0) { - return (true); - } - } - - /* Still waiting for control event to be delivered */ - if (client->nctls > 0) { - return (true); - } - - INSIST(client->naccepts == 0); - INSIST(client->recursionquota == NULL); - if (client->tcplistener != NULL) { - isc_socket_detach(&client->tcplistener); - mark_tcp_active(client, false); - } - if (client->udpsocket != NULL) { - isc_socket_detach(&client->udpsocket); - } - - /* Deactivate the client. */ - if (client->interface != NULL) { - ns_interface_detach(&client->interface); - } - - if (client->dispatch != NULL) { - dns_dispatch_detach(&client->dispatch); - } - - client->attributes = 0; - client->mortal = false; - client->sendcb = NULL; - - if (client->keytag != NULL) { - isc_mem_put(client->mctx, client->keytag, - client->keytag_len); - client->keytag_len = 0; - } - - /* - * Put the client on the inactive list. If we are aiming for - * the "freed" state, it will be removed from the inactive - * list shortly, and we need to keep the manager locked until - * that has been done, lest the manager decide to reactivate - * the dying client inbetween. - */ - client->state = NS_CLIENTSTATE_INACTIVE; - INSIST(client->recursionquota == NULL); - - if (client->state == client->newstate) { - client->newstate = NS_CLIENTSTATE_MAX; - if ((client->sctx->options & - NS_SERVER_CLIENTTEST) == 0 && - manager != NULL && !manager->exiting) - { - ISC_QUEUE_PUSH(manager->inactive, client, - ilink); - } - if (client->needshutdown) { - isc_task_shutdown(client->task); - } - return (true); - } - } - - if (client->state == NS_CLIENTSTATE_INACTIVE) { - INSIST(client->newstate == NS_CLIENTSTATE_FREED); - /* - * We are trying to free the client. - * - * When "shuttingdown" is true, either the task has received - * its shutdown event or no shutdown event has ever been - * set up. Thus, we have no outstanding shutdown - * event at this point. - */ - REQUIRE(client->state == NS_CLIENTSTATE_INACTIVE); - - INSIST(client->recursionquota == NULL); - INSIST(!ISC_QLINK_LINKED(client, ilink)); - - if (manager != NULL) { - LOCK(&manager->listlock); - ISC_LIST_UNLINK(manager->clients, client, link); - LOCK(&manager->lock); - if (manager->exiting && - ISC_LIST_EMPTY(manager->clients)) - destroy_manager = true; - UNLOCK(&manager->lock); - UNLOCK(&manager->listlock); - } - - ns_query_free(client); - isc_mem_put(client->mctx, client->recvbuf, RECV_BUFFER_SIZE); - isc_event_free((isc_event_t **)&client->sendevent); - isc_event_free((isc_event_t **)&client->recvevent); - isc_timer_detach(&client->timer); - if (client->delaytimer != NULL) - isc_timer_detach(&client->delaytimer); - - if (client->tcpbuf != NULL) - isc_mem_put(client->mctx, client->tcpbuf, - TCP_BUFFER_SIZE); - if (client->opt != NULL) { - INSIST(dns_rdataset_isassociated(client->opt)); - dns_rdataset_disassociate(client->opt); - dns_message_puttemprdataset(client->message, - &client->opt); - } - if (client->keytag != NULL) { - isc_mem_put(client->mctx, client->keytag, - client->keytag_len); - client->keytag_len = 0; - } - - dns_message_destroy(&client->message); - - /* - * Detaching the task must be done after unlinking from - * the manager's lists because the manager accesses - * client->task. - */ - if (client->task != NULL) - isc_task_detach(&client->task); - - CTRACE("free"); - client->magic = 0; - - /* - * Check that there are no other external references to - * the memory context. - */ - if ((client->sctx->options & NS_SERVER_CLIENTTEST) != 0 && - isc_mem_references(client->mctx) != 1) - { - isc_mem_stats(client->mctx, stderr); - INSIST(0); - ISC_UNREACHABLE(); - } - - /* - * Destroy the fetchlock mutex that was created in - * ns_query_init(). - */ - isc_mutex_destroy(&client->query.fetchlock); - - if (client->sctx != NULL) - ns_server_detach(&client->sctx); - - isc_mem_putanddetach(&client->mctx, client, sizeof(*client)); - } - - if (destroy_manager && manager != NULL) - clientmgr_destroy(manager); - - return (true); -} - -/*% - * The client's task has received the client's control event - * as part of the startup process. - */ -static void -client_start(isc_task_t *task, isc_event_t *event) { - ns_client_t *client = (ns_client_t *) event->ev_arg; - - INSIST(task == client->task); - - UNUSED(task); - - INSIST(client->nctls == 1); - client->nctls--; - - if (exit_check(client)) - return; - - if (TCP_CLIENT(client)) { - if (client->tcpconn != NULL) { - client_read(client, false); - } else { - client_accept(client); - } - } else { - client_udprecv(client); - } -} - -/*% - * The client's task has received a shutdown event. - */ -static void -client_shutdown(isc_task_t *task, isc_event_t *event) { - ns_client_t *client; - - REQUIRE(event != NULL); - REQUIRE(event->ev_type == ISC_TASKEVENT_SHUTDOWN); - client = event->ev_arg; - REQUIRE(NS_CLIENT_VALID(client)); - REQUIRE(task == client->task); - - UNUSED(task); - - CTRACE("shutdown"); - - isc_event_free(&event); - - if (client->shutdown != NULL) { - (client->shutdown)(client->shutdown_arg, ISC_R_SHUTTINGDOWN); - client->shutdown = NULL; - client->shutdown_arg = NULL; - } - - if (ISC_QLINK_LINKED(client, ilink)) - ISC_QUEUE_UNLINK(client->manager->inactive, client, ilink); - - client->newstate = NS_CLIENTSTATE_FREED; - client->needshutdown = false; - (void)exit_check(client); + UNUSED(client); + UNUSED(seconds); + /* XXXWPK TODO use netmgr to set timeout */ } static void @@ -879,9 +173,15 @@ ns_client_endrequest(ns_client_t *client) { CTRACE("endrequest"); - if (client->next != NULL) { - (client->next)(client); - client->next = NULL; + LOCK(&client->manager->reclock); + if (ISC_LINK_LINKED(client, rlink)) { + ISC_LIST_UNLINK(client->manager->recursing, client, rlink); + } + UNLOCK(&client->manager->reclock); + + if (client->cleanup != NULL) { + (client->cleanup)(client); + client->cleanup = NULL; } if (client->view != NULL) { @@ -906,17 +206,22 @@ ns_client_endrequest(ns_client_t *client) { dns_ecs_init(&client->ecs); dns_message_reset(client->message, DNS_MESSAGE_INTENTPARSE); + /* + * Clean up from recursion - normally this would be done in + * fetch_callback(), but if we're shutting down and canceling then + * it might not have happened. + */ if (client->recursionquota != NULL) { isc_quota_detach(&client->recursionquota); ns_stats_decrement(client->sctx->nsstats, ns_statscounter_recursclients); } + /* - * Clear all client attributes that are specific to - * the request; that's all except the TCP flag. + * Clear all client attributes that are specific to the request */ - client->attributes &= NS_CLIENTATTR_TCP; + client->attributes = 0; #ifdef ENABLE_AFL if (client->sctx->fuzznotify != NULL && (client->sctx->fuzztype == isc_fuzz_client || @@ -926,73 +231,36 @@ ns_client_endrequest(ns_client_t *client) { client->sctx->fuzznotify(); } #endif /* ENABLE_AFL */ - } void -ns_client_next(ns_client_t *client, isc_result_t result) { - int newstate; - +ns_client_drop(ns_client_t *client, isc_result_t result) { REQUIRE(NS_CLIENT_VALID(client)); REQUIRE(client->state == NS_CLIENTSTATE_WORKING || - client->state == NS_CLIENTSTATE_RECURSING || - client->state == NS_CLIENTSTATE_READING); + client->state == NS_CLIENTSTATE_RECURSING); - CTRACE("next"); - - if (result != ISC_R_SUCCESS) + CTRACE("drop"); + if (result != ISC_R_SUCCESS) { ns_client_log(client, DNS_LOGCATEGORY_SECURITY, NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), "request failed: %s", isc_result_totext(result)); - - /* - * An error processing a TCP request may have left - * the connection out of sync. To be safe, we always - * sever the connection when result != ISC_R_SUCCESS. - */ - if (result == ISC_R_SUCCESS && TCP_CLIENT(client)) - newstate = NS_CLIENTSTATE_READING; - else - newstate = NS_CLIENTSTATE_READY; - - if (client->newstate > newstate) - client->newstate = newstate; - (void)exit_check(client); + } } - static void -client_senddone(isc_task_t *task, isc_event_t *event) { - ns_client_t *client; - isc_socketevent_t *sevent = (isc_socketevent_t *) event; +client_senddone(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { + ns_client_t *client = cbarg; - REQUIRE(sevent != NULL); - REQUIRE(sevent->ev_type == ISC_SOCKEVENT_SENDDONE); - client = sevent->ev_arg; - REQUIRE(NS_CLIENT_VALID(client)); - REQUIRE(task == client->task); - REQUIRE(sevent == client->sendevent); - - UNUSED(task); + REQUIRE(client->handle == handle); CTRACE("senddone"); - - if (sevent->result != ISC_R_SUCCESS) - ns_client_log(client, NS_LOGCATEGORY_CLIENT, - NS_LOGMODULE_CLIENT, ISC_LOG_WARNING, - "error sending response: %s", - isc_result_totext(sevent->result)); - - INSIST(client->nsends > 0); - client->nsends--; - - if (client->tcpbuf != NULL) { - INSIST(TCP_CLIENT(client)); - isc_mem_put(client->mctx, client->tcpbuf, TCP_BUFFER_SIZE); - client->tcpbuf = NULL; + if (result != ISC_R_SUCCESS) { + ns_client_log(client, DNS_LOGCATEGORY_SECURITY, + NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), + "send failed: %s", isc_result_totext(result)); } - ns_client_next(client, ISC_R_SUCCESS); + isc_nmhandle_unref(handle); } /*% @@ -1006,34 +274,37 @@ client_senddone(isc_task_t *task, isc_event_t *event) { static isc_result_t client_allocsendbuf(ns_client_t *client, isc_buffer_t *buffer, isc_buffer_t *tcpbuffer, uint32_t length, - unsigned char *sendbuf, unsigned char **datap) + unsigned char **datap) { unsigned char *data; uint32_t bufsize; isc_result_t result; - INSIST(datap != NULL); - INSIST((tcpbuffer == NULL && length != 0) || - (tcpbuffer != NULL && length == 0)); + REQUIRE(datap != NULL); + REQUIRE((tcpbuffer == NULL && length != 0) || + (tcpbuffer != NULL && length == 0)); if (TCP_CLIENT(client)) { INSIST(client->tcpbuf == NULL); - if (length + 2 > TCP_BUFFER_SIZE) { + if (length + 2 > NS_CLIENT_TCP_BUFFER_SIZE) { result = ISC_R_NOSPACE; goto done; } - client->tcpbuf = isc_mem_get(client->mctx, TCP_BUFFER_SIZE); + client->tcpbuf = isc_mem_get(client->mctx, + NS_CLIENT_TCP_BUFFER_SIZE); data = client->tcpbuf; if (tcpbuffer != NULL) { - isc_buffer_init(tcpbuffer, data, TCP_BUFFER_SIZE); - isc_buffer_init(buffer, data + 2, TCP_BUFFER_SIZE - 2); + isc_buffer_init(tcpbuffer, data, + NS_CLIENT_TCP_BUFFER_SIZE); + isc_buffer_init(buffer, data, + NS_CLIENT_TCP_BUFFER_SIZE); } else { - isc_buffer_init(buffer, data, TCP_BUFFER_SIZE); + isc_buffer_init(buffer, data, + NS_CLIENT_TCP_BUFFER_SIZE); INSIST(length <= 0xffff); - isc_buffer_putuint16(buffer, (uint16_t)length); } } else { - data = sendbuf; + data = client->sendbuf; if ((client->attributes & NS_CLIENTATTR_HAVECOOKIE) == 0) { if (client->view != NULL) bufsize = client->view->nocookieudp; @@ -1043,8 +314,8 @@ client_allocsendbuf(ns_client_t *client, isc_buffer_t *buffer, bufsize = client->udpsize; if (bufsize > client->udpsize) bufsize = client->udpsize; - if (bufsize > SEND_BUFFER_SIZE) - bufsize = SEND_BUFFER_SIZE; + if (bufsize > NS_CLIENT_SEND_BUFFER_SIZE) + bufsize = NS_CLIENT_SEND_BUFFER_SIZE; if (length > bufsize) { result = ISC_R_NOSPACE; goto done; @@ -1060,82 +331,13 @@ client_allocsendbuf(ns_client_t *client, isc_buffer_t *buffer, static isc_result_t client_sendpkg(ns_client_t *client, isc_buffer_t *buffer) { - struct in6_pktinfo *pktinfo; - isc_result_t result; isc_region_t r; - isc_sockaddr_t *address; - isc_socket_t *sock; - isc_netaddr_t netaddr; - int match; - unsigned int sockflags = ISC_SOCKFLAG_IMMEDIATE; - - if (TCP_CLIENT(client)) { - sock = client->tcpsocket; - address = NULL; - } else { - dns_aclenv_t *env = - ns_interfacemgr_getaclenv(client->interface->mgr); - - sock = client->udpsocket; - address = &client->peeraddr; - - isc_netaddr_fromsockaddr(&netaddr, &client->peeraddr); - if (client->sctx->blackholeacl != NULL && - (dns_acl_match(&netaddr, NULL, client->sctx->blackholeacl, - env, &match, NULL) == ISC_R_SUCCESS) && - match > 0) - { - return (DNS_R_BLACKHOLED); - } - sockflags |= ISC_SOCKFLAG_NORETRY; - } - - if ((client->attributes & NS_CLIENTATTR_PKTINFO) != 0 && - (client->attributes & NS_CLIENTATTR_MULTICAST) == 0) - pktinfo = &client->pktinfo; - else - pktinfo = NULL; - - if (client->dispatch != NULL) { - isc_dscp_t dscp = dns_dispatch_getdscp(client->dispatch); - if (dscp != -1) { - client->dscp = dscp; - } - } - - if (client->dscp == -1) { - client->sendevent->attributes &= ~ISC_SOCKEVENTATTR_DSCP; - client->sendevent->dscp = 0; - } else { - client->sendevent->attributes |= ISC_SOCKEVENTATTR_DSCP; - client->sendevent->dscp = client->dscp; - } isc_buffer_usedregion(buffer, &r); - /* - * If this is a UDP client and the IPv6 packet can't be - * encapsulated without generating a PTB on a 1500 octet - * MTU link force fragmentation at 1280 if it is a IPv6 - * response. - */ - client->sendevent->attributes &= ~ISC_SOCKEVENTATTR_USEMINMTU; - if (!TCP_CLIENT(client) && r.length > 1432) - client->sendevent->attributes |= ISC_SOCKEVENTATTR_USEMINMTU; + INSIST(client->handle != NULL); - CTRACE("sendto"); - - result = isc_socket_sendto2(sock, &r, client->task, - address, pktinfo, - client->sendevent, sockflags); - if (result == ISC_R_SUCCESS || result == ISC_R_INPROGRESS) { - client->nsends++; - if (result == ISC_R_SUCCESS) - client_senddone(client->task, - (isc_event_t *)client->sendevent); - result = ISC_R_SUCCESS; - } - return (result); + return (isc_nm_send(client->handle, &r, client_senddone, client)); } void @@ -1145,7 +347,6 @@ ns_client_sendraw(ns_client_t *client, dns_message_t *message) { isc_buffer_t buffer; isc_region_t r; isc_region_t *mr; - unsigned char sendbuf[SEND_BUFFER_SIZE]; REQUIRE(NS_CLIENT_VALID(client)); @@ -1157,8 +358,7 @@ ns_client_sendraw(ns_client_t *client, dns_message_t *message) { goto done; } - result = client_allocsendbuf(client, &buffer, NULL, mr->length, - sendbuf, &data); + result = client_allocsendbuf(client, &buffer, NULL, mr->length, &data); if (result != ISC_R_SUCCESS) goto done; @@ -1178,22 +378,23 @@ ns_client_sendraw(ns_client_t *client, dns_message_t *message) { done: if (client->tcpbuf != NULL) { - isc_mem_put(client->mctx, client->tcpbuf, TCP_BUFFER_SIZE); + isc_mem_put(client->mctx, client->tcpbuf, + NS_CLIENT_TCP_BUFFER_SIZE); client->tcpbuf = NULL; } - ns_client_next(client, result); + + ns_client_drop(client, result); } -static void -client_send(ns_client_t *client) { +void +ns_client_send(ns_client_t *client) { isc_result_t result; unsigned char *data; - isc_buffer_t buffer; - isc_buffer_t tcpbuffer; + isc_buffer_t buffer = { .magic = 0 }; + isc_buffer_t tcpbuffer = { .magic = 0 }; isc_region_t r; dns_compress_t cctx; bool cleanup_cctx = false; - unsigned char sendbuf[SEND_BUFFER_SIZE]; unsigned int render_opts; unsigned int preferred_glue; bool opt_included = false; @@ -1205,9 +406,18 @@ client_send(ns_client_t *client) { isc_region_t zr; #endif /* HAVE_DNSTAP */ - REQUIRE(NS_CLIENT_VALID(client)); + /* + * XXXWPK TODO + * Delay the response according to the -T delay option + */ - env = ns_interfacemgr_getaclenv(client->interface->mgr); + REQUIRE(NS_CLIENT_VALID(client)); + /* + * We need to do it to make sure the client and handle + * won't disappear from under us with client_senddone. + */ + + env = ns_interfacemgr_getaclenv(client->manager->interface->mgr); CTRACE("send"); @@ -1247,8 +457,7 @@ client_send(ns_client_t *client) { /* * XXXRTH The following doesn't deal with TCP buffer resizing. */ - result = client_allocsendbuf(client, &buffer, &tcpbuffer, 0, - sendbuf, &data); + result = client_allocsendbuf(client, &buffer, &tcpbuffer, 0, &data); if (result != ISC_R_SUCCESS) goto done; @@ -1364,7 +573,6 @@ client_send(ns_client_t *client) { client->sendcb(&buffer); } else if (TCP_CLIENT(client)) { isc_buffer_usedregion(&buffer, &r); - isc_buffer_putuint16(&tcpbuffer, (uint16_t) r.length); isc_buffer_add(&tcpbuffer, r.length); #ifdef HAVE_DNSTAP if (client->view != NULL) { @@ -1377,6 +585,8 @@ client_send(ns_client_t *client) { /* don't count the 2-octet length header */ respsize = isc_buffer_usedlength(&tcpbuffer) - 2; + + isc_nmhandle_ref(client->handle); result = client_sendpkg(client, &tcpbuffer); switch (isc_sockaddr_pf(&client->peeraddr)) { @@ -1408,6 +618,8 @@ client_send(ns_client_t *client) { #endif /* HAVE_DNSTAP */ respsize = isc_buffer_usedlength(&buffer); + + isc_nmhandle_ref(client->handle); result = client_sendpkg(client, &buffer); switch (isc_sockaddr_pf(&client->peeraddr)) { @@ -1446,85 +658,19 @@ client_send(ns_client_t *client) { ns_stats_increment(client->sctx->nsstats, ns_statscounter_truncatedresp); - if (result == ISC_R_SUCCESS) + if (result == ISC_R_SUCCESS) { return; + } done: if (client->tcpbuf != NULL) { - isc_mem_put(client->mctx, client->tcpbuf, TCP_BUFFER_SIZE); + isc_mem_put(client->mctx, client->tcpbuf, + NS_CLIENT_TCP_BUFFER_SIZE); client->tcpbuf = NULL; } if (cleanup_cctx) dns_compress_invalidate(&cctx); - - ns_client_next(client, result); -} - -/* - * Completes the sending of a delayed client response. - */ -static void -client_delay(isc_task_t *task, isc_event_t *event) { - ns_client_t *client; - - REQUIRE(event != NULL); - REQUIRE(event->ev_type == ISC_TIMEREVENT_LIFE || - event->ev_type == ISC_TIMEREVENT_IDLE); - client = event->ev_arg; - REQUIRE(NS_CLIENT_VALID(client)); - REQUIRE(task == client->task); - REQUIRE(client->delaytimer != NULL); - - UNUSED(task); - - CTRACE("client_delay"); - - isc_event_free(&event); - isc_timer_detach(&client->delaytimer); - - client_send(client); - ns_client_detach(&client); -} - -void -ns_client_send(ns_client_t *client) { - /* - * Delay the response according to the -T delay option - */ - if (client->sctx->delay != 0) { - ns_client_t *dummy = NULL; - isc_result_t result; - isc_interval_t interval; - - /* - * Replace ourselves if we have not already been replaced. - */ - if (!client->mortal) { - result = ns_client_replace(client); - if (result != ISC_R_SUCCESS) - goto nodelay; - } - - ns_client_attach(client, &dummy); - if (client->sctx->delay >= 1000) - isc_interval_set(&interval, client->sctx->delay / 1000, - (client->sctx->delay % 1000) * 1000000); - else - isc_interval_set(&interval, 0, - client->sctx->delay * 1000000); - result = isc_timer_create(client->manager->timermgr, - isc_timertype_once, NULL, &interval, - client->task, client_delay, - client, &client->delaytimer); - if (result == ISC_R_SUCCESS) - return; - - ns_client_detach(&dummy); - } - - nodelay: - client_send(client); } #if NS_CLIENT_DROPPORT @@ -1591,7 +737,7 @@ ns_client_error(ns_client_t *client, isc_result_t result) { NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(10), "dropped error (%.*s) response: suspicious port", (int)isc_buffer_usedlength(&b), buf); - ns_client_next(client, ISC_R_SUCCESS); + ns_client_drop(client, ISC_R_SUCCESS); return; } #endif @@ -1641,7 +787,7 @@ ns_client_error(ns_client_t *client, isc_result_t result) { ns_statscounter_ratedropped); ns_stats_increment(client->sctx->nsstats, ns_statscounter_dropped); - ns_client_next(client, DNS_R_DROP); + ns_client_drop(client, DNS_R_DROP); return; } } @@ -1666,7 +812,7 @@ ns_client_error(ns_client_t *client, isc_result_t result) { */ result = dns_message_reply(message, false); if (result != ISC_R_SUCCESS) { - ns_client_next(client, result); + ns_client_drop(client, result); return; } } @@ -1693,7 +839,7 @@ ns_client_error(ns_client_t *client, isc_result_t result) { NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(1), "possible error packet loop, " "FORMERR dropped"); - ns_client_next(client, result); + ns_client_drop(client, result); return; } client->formerrcache.addr = client->peeraddr; @@ -1749,7 +895,7 @@ ns_client_addopt(ns_client_t *client, dns_message_t *message, REQUIRE(opt != NULL && *opt == NULL); REQUIRE(message != NULL); - env = ns_interfacemgr_getaclenv(client->interface->mgr); + env = ns_interfacemgr_getaclenv(client->manager->interface->mgr); view = client->view; resolver = (view != NULL) ? view->resolver : NULL; if (resolver != NULL) @@ -2324,8 +1470,8 @@ process_opt(ns_client_t *client, dns_rdataset_t *opt) { case DNS_OPT_TCP_KEEPALIVE: if (!USEKEEPALIVE(client)) ns_stats_increment( - client->sctx->nsstats, - ns_statscounter_keepaliveopt); + client->sctx->nsstats, + ns_statscounter_keepaliveopt); client->attributes |= NS_CLIENTATTR_USEKEEPALIVE; isc_buffer_forward(&optbuf, optlen); @@ -2361,14 +1507,94 @@ process_opt(ns_client_t *client, dns_rdataset_t *opt) { return (result); } +void +ns__client_reset_cb(void *client0) { + ns_client_t *client = client0; + + ns_client_log(client, DNS_LOGCATEGORY_SECURITY, + NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), + "reset client"); + + ns_client_endrequest(client); + if (client->tcpbuf != NULL) { + isc_mem_put(client->mctx, client->tcpbuf, + NS_CLIENT_TCP_BUFFER_SIZE); + } + + if (client->keytag != NULL) { + isc_mem_put(client->mctx, client->keytag, + client->keytag_len); + client->keytag_len = 0; + } + + client->state = NS_CLIENTSTATE_READY; + INSIST(client->recursionquota == NULL); +} + +void +ns__client_put_cb(void *client0) { + ns_client_t *client = client0; + + ns_client_log(client, DNS_LOGCATEGORY_SECURITY, + NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), + "freeing client"); + + /* + * Call this first because it requires a valid client. + */ + ns_query_free(client); + + client->magic = 0; + client->shuttingdown = true; + + if (client->manager != NULL) { + clientmgr_detach(&client->manager); + } + + isc_mem_put(client->mctx, client->recvbuf, NS_CLIENT_RECV_BUFFER_SIZE); + if (client->opt != NULL) { + INSIST(dns_rdataset_isassociated(client->opt)); + dns_rdataset_disassociate(client->opt); + dns_message_puttemprdataset(client->message, + &client->opt); + } + + dns_message_destroy(&client->message); + + /* + * Detaching the task must be done after unlinking from + * the manager's lists because the manager accesses + * client->task. + */ + if (client->task != NULL) { + isc_task_detach(&client->task); + } + + /* + * Destroy the fetchlock mutex that was created in + * ns_query_init(). + */ + isc_mutex_destroy(&client->query.fetchlock); + + if (client->sctx != NULL) { + ns_server_detach(&client->sctx); + } + + if (client->mctx != NULL) { + isc_mem_detach(&client->mctx); + } +} + /* * Handle an incoming request event from the socket (UDP case) * or tcpmsg (TCP case). */ void -ns__client_request(isc_task_t *task, isc_event_t *event) { +ns__client_request(isc_nmhandle_t *handle, isc_region_t *region, void *arg) { ns_client_t *client; - isc_socketevent_t *sevent; + bool newclient = false; + ns_clientmgr_t *mgr; + ns_interface_t *ifp; isc_result_t result; isc_result_t sigresult = ISC_R_SUCCESS; isc_buffer_t *buffer; @@ -2386,91 +1612,70 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { #ifdef HAVE_DNSTAP dns_dtmsgtype_t dtmsgtype; #endif + ifp = (ns_interface_t *) arg; - REQUIRE(event != NULL); - client = event->ev_arg; - REQUIRE(NS_CLIENT_VALID(client)); - REQUIRE(task == client->task); + mgr = ifp->clientmgr; + REQUIRE(VALID_MANAGER(mgr)); + + client = isc_nmhandle_getdata(handle); + if (client == NULL) { + client = isc_nmhandle_getextra(handle); + + result = ns__client_setup(client, mgr, true); + if (result != ISC_R_SUCCESS) { + return; + } + + ns_client_log(client, DNS_LOGCATEGORY_SECURITY, + NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), + "allocate new client"); + } else { + result = ns__client_setup(client, NULL, false); + if (result != ISC_R_SUCCESS) { + return; + } + } + + client->state = NS_CLIENTSTATE_READY; + client->dscp = ifp->dscp; + + isc_task_pause(client->task); + if (client->handle == NULL) { + isc_nmhandle_setdata(handle, client, + ns__client_reset_cb, ns__client_put_cb); + client->handle = handle; + } + if (isc_nmhandle_is_stream(handle)) { + client->attributes |= NS_CLIENTATTR_TCP; + unsigned int curr_tcpquota = + isc_quota_getused(&client->sctx->tcpquota); + ns_stats_update_if_greater(client->sctx->nsstats, + ns_statscounter_tcphighwater, + curr_tcpquota); + } INSIST(client->recursionquota == NULL); - INSIST(client->state == (TCP_CLIENT(client) ? - NS_CLIENTSTATE_READING : - NS_CLIENTSTATE_READY)); + INSIST(client->state == NS_CLIENTSTATE_READY); ns_client_requests++; - if (event->ev_type == ISC_SOCKEVENT_RECVDONE) { - INSIST(!TCP_CLIENT(client)); - sevent = (isc_socketevent_t *)event; - REQUIRE(sevent == client->recvevent); - isc_buffer_init(&tbuffer, sevent->region.base, sevent->n); - isc_buffer_add(&tbuffer, sevent->n); - buffer = &tbuffer; - result = sevent->result; - if (result == ISC_R_SUCCESS) { - client->peeraddr = sevent->address; - client->peeraddr_valid = true; - } - if ((sevent->attributes & ISC_SOCKEVENTATTR_DSCP) != 0) { - ns_client_log(client, NS_LOGCATEGORY_CLIENT, - NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(90), - "received DSCP %d", sevent->dscp); - if (client->dscp == -1) - client->dscp = sevent->dscp; - } - if ((sevent->attributes & ISC_SOCKEVENTATTR_PKTINFO) != 0) { - client->attributes |= NS_CLIENTATTR_PKTINFO; - client->pktinfo = sevent->pktinfo; - } - if ((sevent->attributes & ISC_SOCKEVENTATTR_MULTICAST) != 0) - client->attributes |= NS_CLIENTATTR_MULTICAST; - client->nrecvs--; - } else { - INSIST(TCP_CLIENT(client)); - INSIST(client->tcpconn != NULL); - REQUIRE(event->ev_type == DNS_EVENT_TCPMSG); - REQUIRE(event->ev_sender == &client->tcpmsg); - buffer = &client->tcpmsg.buffer; - result = client->tcpmsg.result; - INSIST(client->nreads == 1); - /* - * client->peeraddr was set when the connection was accepted. - */ - client->nreads--; - } + isc_buffer_init(&tbuffer, region->base, region->length); + isc_buffer_add(&tbuffer, region->length); + buffer = &tbuffer; + + client->peeraddr = isc_nmhandle_peeraddr(client->handle); + + client->peeraddr_valid = true; reqsize = isc_buffer_usedlength(buffer); - /* don't count the length header */ - if (TCP_CLIENT(client)) - reqsize -= 2; - if (exit_check(client)) { - return; - } - client->state = client->newstate = NS_CLIENTSTATE_WORKING; + client->state = NS_CLIENTSTATE_WORKING; - isc_task_getcurrenttimex(task, &client->requesttime); + TIME_NOW(&client->requesttime); client->tnow = client->requesttime; client->now = isc_time_seconds(&client->tnow); - if (result != ISC_R_SUCCESS) { - if (TCP_CLIENT(client)) { - ns_client_next(client, result); - } else { - if (result != ISC_R_CANCELED) - isc_log_write(ns_lctx, NS_LOGCATEGORY_CLIENT, - NS_LOGMODULE_CLIENT, - ISC_LOG_ERROR, - "UDP client handler shutting " - "down due to fatal receive " - "error: %s", - isc_result_totext(result)); - isc_task_shutdown(client->task); - } - return; - } - isc_netaddr_fromsockaddr(&netaddr, &client->peeraddr); #if NS_CLIENT_DROPPORT @@ -2479,7 +1684,7 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { ns_client_log(client, DNS_LOGCATEGORY_SECURITY, NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(10), "dropped request: suspicious port"); - ns_client_next(client, ISC_R_SUCCESS); + isc_task_unpause(client->task); return; } #endif @@ -2493,8 +1698,8 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { * Check the blackhole ACL for UDP only, since TCP is done in * client_newconn. */ - env = ns_interfacemgr_getaclenv(client->interface->mgr); - if (!TCP_CLIENT(client)) { + env = ns_interfacemgr_getaclenv(client->manager->interface->mgr); + if (newclient) { if (client->sctx->blackholeacl != NULL && (dns_acl_match(&netaddr, NULL, client->sctx->blackholeacl, env, &match, NULL) == ISC_R_SUCCESS) && @@ -2503,30 +1708,18 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { ns_client_log(client, DNS_LOGCATEGORY_SECURITY, NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(10), "blackholed UDP datagram"); - ns_client_next(client, ISC_R_SUCCESS); + isc_task_unpause(client->task); return; } } - /* - * Silently drop multicast requests for the present. - * XXXMPA revisit this as mDNS spec was published. - */ - if ((client->attributes & NS_CLIENTATTR_MULTICAST) != 0) { - ns_client_log(client, NS_LOGCATEGORY_CLIENT, - NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(2), - "dropping multicast request"); - ns_client_next(client, DNS_R_REFUSED); - return; - } - result = dns_message_peekheader(buffer, &id, &flags); if (result != ISC_R_SUCCESS) { /* * There isn't enough header to determine whether * this was a request or a response. Drop it. */ - ns_client_next(client, result); + isc_task_unpause(client->task); return; } @@ -2536,15 +1729,9 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { * If it's a TCP response, discard it here. */ if ((flags & DNS_MESSAGEFLAG_QR) != 0) { - if (TCP_CLIENT(client)) { - CTRACE("unexpected response"); - ns_client_next(client, DNS_R_FORMERR); - return; - } else { - dns_dispatch_importrecv(client->dispatch, event); - ns_client_next(client, ISC_R_SUCCESS); - return; - } + CTRACE("unexpected response"); + isc_task_unpause(client->task); + return; } /* @@ -2611,34 +1798,20 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { result = DNS_R_FORMERR; } ns_client_error(client, result); + isc_task_unpause(client->task); return; } /* - * Pipeline TCP query processing. + * Disable pipelined TCP query processing if necessary. */ if (TCP_CLIENT(client) && - client->message->opcode != dns_opcode_query) + (client->message->opcode != dns_opcode_query || + (client->sctx->keepresporder != NULL && + dns_acl_allowed(&netaddr, NULL, + client->sctx->keepresporder, env)))) { - client->tcpconn->pipelined = false; - } - if (TCP_CLIENT(client) && client->tcpconn->pipelined) { - /* - * We're pipelining. Replace the client; the - * replacement can read the TCP socket looking - * for new messages and this one can process the - * current message asynchronously. - * - * There will now be at least three clients using this - * TCP socket - one accepting new connections, - * one reading an existing connection to get new - * messages, and one answering the message already - * received. - */ - result = ns_client_replace(client); - if (result != ISC_R_SUCCESS) { - client->tcpconn->pipelined = false; - } + isc_nm_tcpdns_sequential(handle); } dns_opcodestats_increment(client->sctx->opcodestats, @@ -2657,17 +1830,14 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { client->message->rcode = dns_rcode_noerror; - /* RFC1123 section 6.1.3.2 */ - if ((client->attributes & NS_CLIENTATTR_MULTICAST) != 0) - client->message->flags &= ~DNS_MESSAGEFLAG_RD; - /* * Deal with EDNS. */ - if ((client->sctx->options & NS_SERVER_NOEDNS) != 0) + if ((client->sctx->options & NS_SERVER_NOEDNS) != 0) { opt = NULL; - else + } else { opt = dns_message_getopt(client->message); + } client->ecs.source = 0; client->ecs.scope = 0; @@ -2679,6 +1849,7 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { */ if ((client->sctx->options & NS_SERVER_EDNSFORMERR) != 0) { ns_client_error(client, DNS_R_FORMERR); + isc_task_unpause(client->task); return; } @@ -2687,6 +1858,7 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { */ if ((client->sctx->options & NS_SERVER_EDNSNOTIMP) != 0) { ns_client_error(client, DNS_R_NOTIMP); + isc_task_unpause(client->task); return; } @@ -2695,6 +1867,7 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { */ if ((client->sctx->options & NS_SERVER_EDNSREFUSED) != 0) { ns_client_error(client, DNS_R_REFUSED); + isc_task_unpause(client->task); return; } @@ -2702,13 +1875,16 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { * Are we dropping all EDNS queries? */ if ((client->sctx->options & NS_SERVER_DROPEDNS) != 0) { - ns_client_next(client, ISC_R_SUCCESS); + ns_client_drop(client, ISC_R_SUCCESS); + isc_task_unpause(client->task); return; } result = process_opt(client, opt); - if (result != ISC_R_SUCCESS) + if (result != ISC_R_SUCCESS) { + isc_task_unpause(client->task); return; + } } if (client->message->rdclass == 0) { @@ -2719,19 +1895,26 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { result = dns_message_reply(client->message, true); if (result != ISC_R_SUCCESS) { ns_client_error(client, result); + isc_task_unpause(client->task); return; } - if (notimp) + + if (notimp) { client->message->rcode = dns_rcode_notimp; + } + ns_client_send(client); + isc_task_unpause(client->task); return; } + ns_client_log(client, NS_LOGCATEGORY_CLIENT, NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(1), "message class could not be determined"); ns_client_dumpmessage(client, "message class could not be determined"); ns_client_error(client, notimp ? DNS_R_NOTIMP : DNS_R_FORMERR); + isc_task_unpause(client->task); return; } @@ -2743,47 +1926,17 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { * receiving socket (this needs a system call and can be heavy). * For IPv6 UDP queries, we get this from the pktinfo structure (if * supported). + * * If all the attempts fail (this can happen due to memory shortage, * etc), we regard this as an error for safety. */ - if ((client->interface->flags & NS_INTERFACEFLAG_ANYADDR) == 0) + if ((client->manager->interface->flags & NS_INTERFACEFLAG_ANYADDR) == 0) isc_netaddr_fromsockaddr(&client->destaddr, - &client->interface->addr); + &client->manager->interface->addr); else { - isc_sockaddr_t sockaddr; - result = ISC_R_FAILURE; - - if (TCP_CLIENT(client)) - result = isc_socket_getsockname(client->tcpsocket, - &sockaddr); - if (result == ISC_R_SUCCESS) - isc_netaddr_fromsockaddr(&client->destaddr, &sockaddr); - if (result != ISC_R_SUCCESS && - client->interface->addr.type.sa.sa_family == AF_INET6 && - (client->attributes & NS_CLIENTATTR_PKTINFO) != 0) { - /* - * XXXJT technically, we should convert the receiving - * interface ID to a proper scope zone ID. However, - * due to the fact there is no standard API for this, - * we only handle link-local addresses and use the - * interface index as link ID. Despite the assumption, - * it should cover most typical cases. - */ - isc_netaddr_fromin6(&client->destaddr, - &client->pktinfo.ipi6_addr); - if (IN6_IS_ADDR_LINKLOCAL(&client->pktinfo.ipi6_addr)) - isc_netaddr_setzone(&client->destaddr, - client->pktinfo.ipi6_ifindex); - result = ISC_R_SUCCESS; - } - if (result != ISC_R_SUCCESS) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "failed to get request's " - "destination: %s", - isc_result_totext(result)); - ns_client_next(client, ISC_R_SUCCESS); - return; - } + isc_sockaddr_t sockaddr = + isc_nmhandle_localaddr(client->handle); + isc_netaddr_fromsockaddr(&client->destaddr, &sockaddr); } isc_sockaddr_fromnetaddr(&client->destsockaddr, &client->destaddr, 0); @@ -2816,6 +1969,7 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { "no matching view in class '%s'", classname); ns_client_dumpmessage(client, "no matching view in class"); ns_client_error(client, notimp ? DNS_R_NOTIMP : DNS_R_REFUSED); + isc_task_unpause(client->task); return; } @@ -2908,14 +2062,17 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { "request has invalid signature: %s (%s)", isc_result_totext(result), tsigrcode); } + /* * Accept update messages signed by unknown keys so that * update forwarding works transparently through slaves * that don't have all the same keys as the master. */ if (!(client->message->tsigstatus == dns_tsigerror_badkey && - client->message->opcode == dns_opcode_update)) { + client->message->opcode == dns_opcode_update)) + { ns_client_error(client, sigresult); + isc_task_unpause(client->task); return; } } @@ -2942,7 +2099,9 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { ns_client_checkaclsilent(client, &client->destaddr, client->view->cacheonacl, true) == ISC_R_SUCCESS) + { ra = true; + } if (ra == true) { client->attributes |= NS_CLIENTATTR_RA; @@ -2984,6 +2143,7 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { &client->requesttime, NULL, buffer); #endif /* HAVE_DNSTAP */ + isc_nmhandle_ref(client->handle); ns_query_start(client); break; case dns_opcode_update: @@ -2994,11 +2154,13 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { &client->requesttime, NULL, buffer); #endif /* HAVE_DNSTAP */ ns_client_settimeout(client, 60); + isc_nmhandle_ref(client->handle); ns_update_start(client, sigresult); break; case dns_opcode_notify: CTRACE("notify"); ns_client_settimeout(client, 60); + isc_nmhandle_ref(client->handle); ns_notify_start(client); break; case dns_opcode_iquery: @@ -3009,82 +2171,48 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { CTRACE("unknown opcode"); ns_client_error(client, DNS_R_NOTIMP); } + + isc_task_unpause(client->task); } static void -client_timeout(isc_task_t *task, isc_event_t *event) { - ns_client_t *client; - - REQUIRE(event != NULL); - REQUIRE(event->ev_type == ISC_TIMEREVENT_LIFE || - event->ev_type == ISC_TIMEREVENT_IDLE); - client = event->ev_arg; - REQUIRE(NS_CLIENT_VALID(client)); - REQUIRE(task == client->task); - REQUIRE(client->timer != NULL); - - UNUSED(task); - - CTRACE("timeout"); - - isc_event_free(&event); - - if (client->shutdown != NULL) { - (client->shutdown)(client->shutdown_arg, ISC_R_TIMEDOUT); - client->shutdown = NULL; - client->shutdown_arg = NULL; - } - - if (client->newstate > NS_CLIENTSTATE_READY) - client->newstate = NS_CLIENTSTATE_READY; - (void)exit_check(client); -} - -static isc_result_t get_clientmctx(ns_clientmgr_t *manager, isc_mem_t **mctxp) { isc_mem_t *clientmctx; -#if NMCTXS > 0 +#if CLIENT_NMCTXS > 0 unsigned int nextmctx; #endif MTRACE("clientmctx"); - /* - * Caller must be holding the manager lock. - */ - if ((manager->sctx->options & NS_SERVER_CLIENTTEST) != 0) { - isc_mem_create(mctxp); - isc_mem_setname(*mctxp, "client", NULL); - return (ISC_R_SUCCESS); - } -#if NMCTXS > 0 - nextmctx = manager->nextmctx++; - if (manager->nextmctx == NMCTXS) - manager->nextmctx = 0; +#if CLIENT_NMCTXS > 0 + LOCK(&manager->lock); + if (isc_nm_tid()>=0) { + nextmctx = isc_nm_tid(); + } else { + nextmctx = manager->nextmctx++; + if (manager->nextmctx == CLIENT_NMCTXS) + manager->nextmctx = 0; - INSIST(nextmctx < NMCTXS); + INSIST(nextmctx < CLIENT_NMCTXS); + } clientmctx = manager->mctxpool[nextmctx]; if (clientmctx == NULL) { isc_mem_create(&clientmctx); isc_mem_setname(clientmctx, "client", NULL); - manager->mctxpool[nextmctx] = clientmctx; } + UNLOCK(&manager->lock); #else clientmctx = manager->mctx; #endif isc_mem_attach(clientmctx, mctxp); - - return (ISC_R_SUCCESS); } -static isc_result_t -client_create(ns_clientmgr_t *manager, ns_client_t **clientp) { - ns_client_t *client; +isc_result_t +ns__client_setup(ns_client_t *client, ns_clientmgr_t *mgr, bool new) { isc_result_t result; - isc_mem_t *mctx = NULL; /* * Caller must be holding the manager lock. @@ -3094,560 +2222,191 @@ client_create(ns_clientmgr_t *manager, ns_client_t **clientp) { * The caller is responsible for that. */ - REQUIRE(clientp != NULL && *clientp == NULL); + REQUIRE(NS_CLIENT_VALID(client) || (new && client != NULL)); + REQUIRE(VALID_MANAGER(mgr) || !new); - result = get_clientmctx(manager, &mctx); - if (result != ISC_R_SUCCESS) - return (result); + if (new) { + *client = (ns_client_t) { + .magic = 0 + }; - client = isc_mem_get(mctx, sizeof(*client)); - client->mctx = mctx; + get_clientmctx(mgr, &client->mctx); + clientmgr_attach(mgr, &client->manager); + ns_server_attach(mgr->sctx, &client->sctx); + result = isc_task_create(mgr->taskmgr, 20, &client->task); + if (result != ISC_R_SUCCESS) { + goto cleanup; + } + result = dns_message_create(client->mctx, + DNS_MESSAGE_INTENTPARSE, + &client->message); + if (result != ISC_R_SUCCESS) { + goto cleanup; + } - client->sctx = NULL; - ns_server_attach(manager->sctx, &client->sctx); - client->task = NULL; - result = isc_task_create(manager->taskmgr, 0, &client->task); - if (result != ISC_R_SUCCESS) - goto cleanup_client; - isc_task_setname(client->task, "client", client); + client->recvbuf = isc_mem_get(client->mctx, + NS_CLIENT_RECV_BUFFER_SIZE); + /* + * Set magic earlier than usual because ns_query_init() + * and the functions it calls will require it. + */ + client->magic = NS_CLIENT_MAGIC; + result = ns_query_init(client); + if (result != ISC_R_SUCCESS) { + goto cleanup; + } + } else { + ns_clientmgr_t *oldmgr = client->manager; + ns_server_t *sctx = client->sctx; + isc_task_t *task = client->task; + unsigned char *recvbuf = client->recvbuf; + dns_message_t *message = client->message; + isc_mem_t *oldmctx = client->mctx; + ns_query_t query = client->query; - client->timer = NULL; - result = isc_timer_create(manager->timermgr, isc_timertype_inactive, - NULL, NULL, client->task, client_timeout, - client, &client->timer); - if (result != ISC_R_SUCCESS) - goto cleanup_task; - client->timerset = false; - - client->delaytimer = NULL; - - client->message = NULL; - result = dns_message_create(client->mctx, DNS_MESSAGE_INTENTPARSE, - &client->message); - if (result != ISC_R_SUCCESS) - goto cleanup_timer; - - /* XXXRTH Hardwired constants */ - - client->sendevent = isc_socket_socketevent(client->mctx, client, - ISC_SOCKEVENT_SENDDONE, - client_senddone, client); - if (client->sendevent == NULL) { - result = ISC_R_NOMEMORY; - goto cleanup_message; + *client = (ns_client_t) { + .magic = 0, + .mctx = oldmctx, + .manager = oldmgr, + .sctx = sctx, + .task = task, + .recvbuf = recvbuf, + .message = message, + .query = query + }; } - client->recvbuf = isc_mem_get(client->mctx, RECV_BUFFER_SIZE); - - client->recvevent = isc_socket_socketevent(client->mctx, client, - ISC_SOCKEVENT_RECVDONE, - ns__client_request, client); - if (client->recvevent == NULL) { - result = ISC_R_NOMEMORY; - goto cleanup_recvbuf; - } - - client->magic = NS_CLIENT_MAGIC; - client->manager = NULL; client->state = NS_CLIENTSTATE_INACTIVE; - client->newstate = NS_CLIENTSTATE_MAX; - client->naccepts = 0; - client->nreads = 0; - client->nsends = 0; - client->nrecvs = 0; - client->nupdates = 0; - client->nctls = 0; - isc_refcount_init(&client->references, 0); - client->attributes = 0; - client->view = NULL; - client->dispatch = NULL; - client->udpsocket = NULL; - client->tcplistener = NULL; - client->tcpsocket = NULL; - client->tcpmsg_valid = false; - client->tcpbuf = NULL; - client->opt = NULL; client->udpsize = 512; client->dscp = -1; - client->extflags = 0; client->ednsversion = -1; - client->next = NULL; - client->shutdown = NULL; - client->shutdown_arg = NULL; - client->signer = NULL; dns_name_init(&client->signername, NULL); - client->mortal = false; - client->sendcb = NULL; - client->tcpconn = NULL; - client->recursionquota = NULL; - client->interface = NULL; - client->peeraddr_valid = false; dns_ecs_init(&client->ecs); - client->needshutdown = ((client->sctx->options & - NS_SERVER_CLIENTTEST) != 0); - client->tcpactive = false; - - ISC_EVENT_INIT(&client->ctlevent, sizeof(client->ctlevent), 0, NULL, - NS_EVENT_CLIENTCONTROL, client_start, client, client, - NULL, NULL); - /* - * Initialize FORMERR cache to sentinel value that will not match - * any actual FORMERR response. - */ isc_sockaddr_any(&client->formerrcache.addr); client->formerrcache.time = 0; client->formerrcache.id = 0; - ISC_LINK_INIT(client, link); ISC_LINK_INIT(client, rlink); - ISC_QLINK_INIT(client, ilink); - client->keytag = NULL; - client->keytag_len = 0; client->rcode_override = -1; /* not set */ - /* - * We call the init routines for the various kinds of client here, - * after we have created an otherwise valid client, because some - * of them call routines that REQUIRE(NS_CLIENT_VALID(client)). - */ - result = ns_query_init(client); - if (result != ISC_R_SUCCESS) - goto cleanup_recvevent; + client->magic = NS_CLIENT_MAGIC; - result = isc_task_onshutdown(client->task, client_shutdown, client); - if (result != ISC_R_SUCCESS) - goto cleanup_query; - - CTRACE("create"); - - *clientp = client; + CTRACE("client_setup"); return (ISC_R_SUCCESS); - cleanup_query: - ns_query_free(client); + cleanup: + if (client->recvbuf != NULL) { + isc_mem_put(client->mctx, client->recvbuf, + NS_CLIENT_RECV_BUFFER_SIZE); + } - cleanup_recvevent: - isc_event_free((isc_event_t **)&client->recvevent); + if (client->message != NULL) { + dns_message_destroy(&client->message); + } - cleanup_recvbuf: - isc_mem_put(client->mctx, client->recvbuf, RECV_BUFFER_SIZE); - - isc_event_free((isc_event_t **)&client->sendevent); - - client->magic = 0; - - cleanup_message: - dns_message_destroy(&client->message); - - cleanup_timer: - isc_timer_detach(&client->timer); - - cleanup_task: - isc_task_detach(&client->task); - - cleanup_client: - if (client->sctx != NULL) - ns_server_detach(&client->sctx); - isc_mem_putanddetach(&client->mctx, client, sizeof(*client)); + if (client->task != NULL) { + isc_task_detach(&client->task); + } return (result); } -static void -client_read(ns_client_t *client, bool newconn) { - isc_result_t result; - - CTRACE("read"); - - result = dns_tcpmsg_readmessage(&client->tcpmsg, client->task, - ns__client_request, client); - if (result != ISC_R_SUCCESS) - goto fail; - - /* - * Set a timeout to limit the amount of time we will wait - * for a request on this TCP connection. - */ - read_settimeout(client, newconn); - - client->state = client->newstate = NS_CLIENTSTATE_READING; - INSIST(client->nreads == 0); - INSIST(client->recursionquota == NULL); - client->nreads++; - - return; - fail: - ns_client_next(client, result); -} - -static void -client_newconn(isc_task_t *task, isc_event_t *event) { - isc_result_t result; - ns_client_t *client = event->ev_arg; - isc_socket_newconnev_t *nevent = (isc_socket_newconnev_t *)event; - dns_aclenv_t *env; - uint32_t old; - - REQUIRE(event->ev_type == ISC_SOCKEVENT_NEWCONN); - REQUIRE(NS_CLIENT_VALID(client)); - REQUIRE(client->task == task); - - env = ns_interfacemgr_getaclenv(client->interface->mgr); - - UNUSED(task); - - INSIST(client->state == NS_CLIENTSTATE_READY); - - /* - * The accept() was successful and we're now establishing a new - * connection. We need to make note of it in the client and - * interface objects so client objects can do the right thing - * when going inactive in exit_check() (see comments in - * client_accept() for details). - */ - INSIST(client->naccepts == 1); - client->naccepts--; - - old = isc_refcount_decrement(&client->interface->ntcpaccepting); - INSIST(old > 0); - - /* - * We must take ownership of the new socket before the exit - * check to make sure it gets destroyed if we decide to exit. - */ - if (nevent->result == ISC_R_SUCCESS) { - client->tcpsocket = nevent->newsocket; - isc_socket_setname(client->tcpsocket, "client-tcp", NULL); - client->state = NS_CLIENTSTATE_READING; - INSIST(client->recursionquota == NULL); - - (void)isc_socket_getpeername(client->tcpsocket, - &client->peeraddr); - client->peeraddr_valid = true; - ns_client_log(client, NS_LOGCATEGORY_CLIENT, - NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), - "new TCP connection"); - } else { - /* - * XXXRTH What should we do? We're trying to accept but - * it didn't work. If we just give up, then TCP - * service may eventually stop. - * - * For now, we just go idle. - * - * Going idle is probably the right thing if the - * I/O was canceled. - */ - ns_client_log(client, NS_LOGCATEGORY_CLIENT, - NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), - "accept failed: %s", - isc_result_totext(nevent->result)); - tcpconn_detach(client); - } - - if (exit_check(client)) - goto freeevent; - - if (nevent->result == ISC_R_SUCCESS) { - int match; - isc_netaddr_t netaddr; - - isc_netaddr_fromsockaddr(&netaddr, &client->peeraddr); - - if (client->sctx->blackholeacl != NULL && - (dns_acl_match(&netaddr, NULL, client->sctx->blackholeacl, - env, &match, NULL) == ISC_R_SUCCESS) && - match > 0) - { - ns_client_log(client, DNS_LOGCATEGORY_SECURITY, - NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(10), - "blackholed connection attempt"); - client->newstate = NS_CLIENTSTATE_READY; - (void)exit_check(client); - goto freeevent; - } - - INSIST(client->tcpmsg_valid == false); - dns_tcpmsg_init(client->mctx, client->tcpsocket, - &client->tcpmsg); - client->tcpmsg_valid = true; - - /* - * Let a new client take our place immediately, before - * we wait for a request packet. If we don't, - * telnetting to port 53 (once per CPU) will - * deny service to legitimate TCP clients. - */ - result = ns_client_replace(client); - if (result == ISC_R_SUCCESS && - (client->sctx->keepresporder == NULL || - !dns_acl_allowed(&netaddr, NULL, - client->sctx->keepresporder, env))) - { - client->tcpconn->pipelined = true; - } - - client_read(client, true); - } - - freeevent: - isc_event_free(&event); -} - -static void -client_accept(ns_client_t *client) { - isc_result_t result; - - CTRACE("accept"); - /* - * Set up a new TCP connection. This means try to attach to the - * TCP client quota (tcp-clients), but fail if we're over quota. - */ - result = tcpconn_init(client, false); - if (result != ISC_R_SUCCESS) { - bool exit; - - ns_client_log(client, NS_LOGCATEGORY_CLIENT, - NS_LOGMODULE_CLIENT, ISC_LOG_WARNING, - "TCP client quota reached: %s", - isc_result_totext(result)); - - /* - * We have exceeded the system-wide TCP client quota. But, - * we can't just block this accept in all cases, because if - * we did, a heavy TCP load on other interfaces might cause - * this interface to be starved, with no clients able to - * accept new connections. - * - * So, we check here to see if any other clients are - * already servicing TCP queries on this interface (whether - * accepting, reading, or processing). If we find that at - * least one client other than this one is active, then - * it's okay *not* to call accept - we can let this - * client go inactive and another will take over when it's - * done. - * - * If there aren't enough active clients on the interface, - * then we can be a little bit flexible about the quota. - * We'll allow *one* extra client through to ensure we're - * listening on every interface; we do this by setting the - * 'force' option to tcpconn_init(). - * - * (Note: In practice this means that the real TCP client - * quota is tcp-clients plus the number of listening - * interfaces plus 1.) - */ - exit = (isc_refcount_current(&client->interface->ntcpactive) > - (client->tcpactive ? 1U : 0U)); - if (exit) { - client->newstate = NS_CLIENTSTATE_INACTIVE; - (void)exit_check(client); - return; - } - - result = tcpconn_init(client, true); - RUNTIME_CHECK(result == ISC_R_SUCCESS); - } - - /* TCP high-water stats update. */ - unsigned int curr_tcpquota = isc_quota_getused(&client->sctx->tcpquota); - ns_stats_update_if_greater(client->sctx->nsstats, - ns_statscounter_tcphighwater, - curr_tcpquota); - - /* - * If this client was set up using get_client() or get_worker(), - * then TCP is already marked active. However, if it was restarted - * from exit_check(), it might not be, so we take care of it now. - */ - mark_tcp_active(client, true); - - result = isc_socket_accept(client->tcplistener, client->task, - client_newconn, client); - if (result != ISC_R_SUCCESS) { - /* - * XXXRTH What should we do? We're trying to accept but - * it didn't work. If we just give up, then TCP - * service may eventually stop. - * - * For now, we just go idle. - */ - UNEXPECTED_ERROR(__FILE__, __LINE__, - "isc_socket_accept() failed: %s", - isc_result_totext(result)); - - tcpconn_detach(client); - mark_tcp_active(client, false); - return; - } - - /* - * The client's 'naccepts' counter indicates that this client has - * called accept() and is waiting for a new connection. It should - * never exceed 1. - */ - INSIST(client->naccepts == 0); - client->naccepts++; - - /* - * The interface's 'ntcpaccepting' counter is incremented when - * any client calls accept(), and decremented in client_newconn() - * once the connection is established. - * - * When the client object is shutting down after handling a TCP - * request (see exit_check()), if this value is at least one, that - * means another client has called accept() and is waiting to - * establish the next connection. That means the client may be - * be free to become inactive; otherwise it may need to start - * listening for connections itself to prevent the interface - * going dead. - */ - isc_refcount_increment0(&client->interface->ntcpaccepting); -} - -static void -client_udprecv(ns_client_t *client) { - isc_result_t result; - isc_region_t r; - - CTRACE("udprecv"); - - r.base = client->recvbuf; - r.length = RECV_BUFFER_SIZE; - result = isc_socket_recv2(client->udpsocket, &r, 1, - client->task, client->recvevent, 0); - if (result != ISC_R_SUCCESS) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "isc_socket_recv2() failed: %s", - isc_result_totext(result)); - /* - * This cannot happen in the current implementation, since - * isc_socket_recv2() cannot fail if flags == 0. - * - * If this does fail, we just go idle. - */ - return; - } - INSIST(client->nrecvs == 0); - client->nrecvs++; -} - -void -ns_client_attach(ns_client_t *source, ns_client_t **targetp) { - uint32_t oldrefs; - REQUIRE(NS_CLIENT_VALID(source)); - REQUIRE(targetp != NULL && *targetp == NULL); - - oldrefs = isc_refcount_increment(&source->references); - ns_client_log(source, NS_LOGCATEGORY_CLIENT, - NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(10), - "ns_client_attach: ref = %d", oldrefs+1); - *targetp = source; -} - -void -ns_client_detach(ns_client_t **clientp) { - int32_t oldrefs; - ns_client_t *client = *clientp; - oldrefs = isc_refcount_decrement(&client->references); - INSIST(oldrefs > 0); - - *clientp = NULL; - ns_client_log(client, NS_LOGCATEGORY_CLIENT, - NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(10), - "ns_client_detach: ref = %d", oldrefs-1); - (void)exit_check(client); -} - bool ns_client_shuttingdown(ns_client_t *client) { - return (client->newstate == NS_CLIENTSTATE_FREED); -} - -isc_result_t -ns_client_replace(ns_client_t *client) { - isc_result_t result; - bool tcp; - - CTRACE("replace"); - - REQUIRE(client != NULL); - REQUIRE(client->manager != NULL); - - tcp = TCP_CLIENT(client); - if (tcp && client->tcpconn != NULL && client->tcpconn->pipelined) { - result = get_worker(client->manager, client->interface, - client->tcpsocket, client); - } else { - result = get_client(client->manager, client->interface, - client->dispatch, tcp); - - } - if (result != ISC_R_SUCCESS) { - return (result); - } - - /* - * The responsibility for listening for new requests is hereby - * transferred to the new client. Therefore, the old client - * should refrain from listening for any more requests. - */ - client->mortal = true; - - return (ISC_R_SUCCESS); + return (client->shuttingdown); } /*** *** Client Manager ***/ +static void +clientmgr_attach(ns_clientmgr_t *source, ns_clientmgr_t **targetp) { + int32_t oldrefs; + + REQUIRE(VALID_MANAGER(source)); + REQUIRE(targetp != NULL && *targetp == NULL); + + oldrefs = isc_refcount_increment(&source->references); + isc_log_write(ns_lctx, NS_LOGCATEGORY_CLIENT, + NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), + "clientmgr @%p attach: %d", source, oldrefs + 1); + + *targetp = source; +} + +static void +clientmgr_detach(ns_clientmgr_t **mp) { + ns_clientmgr_t *mgr = *mp; + int32_t oldrefs; + oldrefs = isc_refcount_decrement(&mgr->references); + INSIST(oldrefs > 0); + + isc_log_write(ns_lctx, NS_LOGCATEGORY_CLIENT, + NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), + "clientmgr @%p detach: %d", mgr, oldrefs - 1); + if (oldrefs == 1) { + clientmgr_destroy(mgr); + } + + *mp = NULL; +} + static void clientmgr_destroy(ns_clientmgr_t *manager) { -#if NMCTXS > 0 +#if CLIENT_NMCTXS > 0 int i; #endif - REQUIRE(ISC_LIST_EMPTY(manager->clients)); - MTRACE("clientmgr_destroy"); -#if NMCTXS > 0 - for (i = 0; i < NMCTXS; i++) { + manager->magic = 0; + +#if CLIENT_NMCTXS > 0 + for (i = 0; i < CLIENT_NMCTXS; i++) { if (manager->mctxpool[i] != NULL) isc_mem_detach(&manager->mctxpool[i]); } #endif - ISC_QUEUE_DESTROY(manager->inactive); + if (manager->interface != NULL) { + ns_interface_detach(&manager->interface); + } isc_mutex_destroy(&manager->lock); - isc_mutex_destroy(&manager->listlock); isc_mutex_destroy(&manager->reclock); if (manager->excl != NULL) isc_task_detach(&manager->excl); + for (i = 0; i < CLIENT_NTASKS; i++) { + if (manager->taskpool[i] != NULL) { + isc_task_detach(&manager->taskpool[i]); + } + } + isc_mem_put(manager->mctx, manager->taskpool, + CLIENT_NTASKS * sizeof(isc_task_t *)); ns_server_detach(&manager->sctx); - manager->magic = 0; isc_mem_put(manager->mctx, manager, sizeof(*manager)); } isc_result_t ns_clientmgr_create(isc_mem_t *mctx, ns_server_t *sctx, isc_taskmgr_t *taskmgr, - isc_timermgr_t *timermgr, ns_clientmgr_t **managerp) + isc_timermgr_t *timermgr, ns_interface_t *interface, + ns_clientmgr_t **managerp) { ns_clientmgr_t *manager; isc_result_t result; -#if NMCTXS > 0 +#if CLIENT_NMCTXS > 0 int i; #endif manager = isc_mem_get(mctx, sizeof(*manager)); + *manager = (ns_clientmgr_t) { .magic = 0 }; isc_mutex_init(&manager->lock); - isc_mutex_init(&manager->listlock); isc_mutex_init(&manager->reclock); manager->excl = NULL; @@ -3659,17 +2418,24 @@ ns_clientmgr_create(isc_mem_t *mctx, ns_server_t *sctx, isc_taskmgr_t *taskmgr, manager->mctx = mctx; manager->taskmgr = taskmgr; manager->timermgr = timermgr; - manager->exiting = false; + ns_interface_attach(interface, &manager->interface); + + manager->exiting = false; + manager->taskpool = + isc_mem_get(mctx, CLIENT_NTASKS*sizeof(isc_task_t *)); + for (i = 0; i < CLIENT_NTASKS; i++) { + manager->taskpool[i] = NULL; + isc_task_create(manager->taskmgr, 20, &manager->taskpool[i]); + } + isc_refcount_init(&manager->references, 1); manager->sctx = NULL; ns_server_attach(sctx, &manager->sctx); - ISC_LIST_INIT(manager->clients); ISC_LIST_INIT(manager->recursing); - ISC_QUEUE_INIT(manager->inactive, ilink); -#if NMCTXS > 0 +#if CLIENT_NMCTXS > 0 manager->nextmctx = 0; - for (i = 0; i < NMCTXS; i++) + for (i = 0; i < CLIENT_NMCTXS; i++) manager->mctxpool[i] = NULL; /* will be created on-demand */ #endif manager->magic = MANAGER_MAGIC; @@ -3682,7 +2448,6 @@ ns_clientmgr_create(isc_mem_t *mctx, ns_server_t *sctx, isc_taskmgr_t *taskmgr, cleanup_reclock: isc_mutex_destroy(&manager->reclock); - isc_mutex_destroy(&manager->listlock); isc_mutex_destroy(&manager->lock); isc_mem_put(manager->mctx, manager, sizeof(*manager)); @@ -3694,8 +2459,8 @@ void ns_clientmgr_destroy(ns_clientmgr_t **managerp) { isc_result_t result; ns_clientmgr_t *manager; - ns_client_t *client; - bool need_destroy = false, unlock = false; + bool unlock = false; + int32_t oldrefs; REQUIRE(managerp != NULL); manager = *managerp; @@ -3703,248 +2468,31 @@ ns_clientmgr_destroy(ns_clientmgr_t **managerp) { MTRACE("destroy"); + /* XXXWPK TODO we need to pause netmgr here */ /* * Check for success because we may already be task-exclusive * at this point. Only if we succeed at obtaining an exclusive * lock now will we need to relinquish it later. */ result = isc_task_beginexclusive(manager->excl); - if (result == ISC_R_SUCCESS) + if (result == ISC_R_SUCCESS) { unlock = true; + } manager->exiting = true; - for (client = ISC_LIST_HEAD(manager->clients); - client != NULL; - client = ISC_LIST_NEXT(client, link)) - isc_task_shutdown(client->task); - - if (ISC_LIST_EMPTY(manager->clients)) - need_destroy = true; - - if (unlock) + if (unlock) { isc_task_endexclusive(manager->excl); + } - if (need_destroy) + oldrefs = isc_refcount_decrement(&manager->references); + if (oldrefs == 1) { clientmgr_destroy(manager); + } *managerp = NULL; } -static isc_result_t -get_client(ns_clientmgr_t *manager, ns_interface_t *ifp, - dns_dispatch_t *disp, bool tcp) -{ - isc_result_t result = ISC_R_SUCCESS; - isc_event_t *ev; - ns_client_t *client; - MTRACE("get client"); - - REQUIRE(manager != NULL); - - if (manager->exiting) - return (ISC_R_SHUTTINGDOWN); - - /* - * Allocate a client. First try to get a recycled one; - * if that fails, make a new one. - */ - client = NULL; - if ((manager->sctx->options & NS_SERVER_CLIENTTEST) == 0) { - ISC_QUEUE_POP(manager->inactive, ilink, client); - } - - if (client != NULL) { - MTRACE("recycle"); - } else { - MTRACE("create new"); - - LOCK(&manager->lock); - result = client_create(manager, &client); - UNLOCK(&manager->lock); - if (result != ISC_R_SUCCESS) - return (result); - - LOCK(&manager->listlock); - ISC_LIST_APPEND(manager->clients, client, link); - UNLOCK(&manager->listlock); - } - - client->manager = manager; - ns_interface_attach(ifp, &client->interface); - client->state = NS_CLIENTSTATE_READY; - client->sctx = manager->sctx; - INSIST(client->recursionquota == NULL); - - client->dscp = ifp->dscp; - client->rcode_override = -1; /* not set */ - - if (tcp) { - mark_tcp_active(client, true); - - client->attributes |= NS_CLIENTATTR_TCP; - isc_socket_attach(ifp->tcpsocket, - &client->tcplistener); - - } else { - isc_socket_t *sock; - - dns_dispatch_attach(disp, &client->dispatch); - sock = dns_dispatch_getsocket(client->dispatch); - isc_socket_attach(sock, &client->udpsocket); - } - - INSIST(client->nctls == 0); - client->nctls++; - ev = &client->ctlevent; - isc_task_send(client->task, &ev); - - return (result); -} - -static isc_result_t -get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock, - ns_client_t *oldclient) -{ - isc_result_t result = ISC_R_SUCCESS; - isc_event_t *ev; - ns_client_t *client; - MTRACE("get worker"); - - REQUIRE(manager != NULL); - REQUIRE(oldclient != NULL); - - if (manager->exiting) - return (ISC_R_SHUTTINGDOWN); - - /* - * Allocate a client. First try to get a recycled one; - * if that fails, make a new one. - */ - client = NULL; - if ((manager->sctx->options & NS_SERVER_CLIENTTEST) == 0) - ISC_QUEUE_POP(manager->inactive, ilink, client); - - if (client != NULL) - MTRACE("recycle"); - else { - MTRACE("create new"); - - LOCK(&manager->lock); - result = client_create(manager, &client); - UNLOCK(&manager->lock); - if (result != ISC_R_SUCCESS) - return (result); - - LOCK(&manager->listlock); - ISC_LIST_APPEND(manager->clients, client, link); - UNLOCK(&manager->listlock); - } - - client->manager = manager; - ns_interface_attach(ifp, &client->interface); - client->newstate = client->state = NS_CLIENTSTATE_WORKING; - INSIST(client->recursionquota == NULL); - client->sctx = manager->sctx; - - client->dscp = ifp->dscp; - - client->attributes |= NS_CLIENTATTR_TCP; - client->mortal = true; - client->sendcb = NULL; - client->rcode_override = -1; /* not set */ - - tcpconn_attach(oldclient, client); - mark_tcp_active(client, true); - - isc_socket_attach(ifp->tcpsocket, &client->tcplistener); - isc_socket_attach(sock, &client->tcpsocket); - isc_socket_setname(client->tcpsocket, "worker-tcp", NULL); - (void)isc_socket_getpeername(client->tcpsocket, &client->peeraddr); - client->peeraddr_valid = true; - - INSIST(client->tcpmsg_valid == false); - dns_tcpmsg_init(client->mctx, client->tcpsocket, &client->tcpmsg); - client->tcpmsg_valid = true; - - INSIST(client->nctls == 0); - client->nctls++; - ev = &client->ctlevent; - isc_task_send(client->task, &ev); - - return (result); -} - -isc_result_t -ns__clientmgr_getclient(ns_clientmgr_t *manager, ns_interface_t *ifp, - bool tcp, ns_client_t **clientp) -{ - isc_result_t result = ISC_R_SUCCESS; - ns_client_t *client; - MTRACE("getclient"); - - REQUIRE(VALID_MANAGER(manager)); - REQUIRE(clientp != NULL && *clientp == NULL); - - if (manager->exiting) - return (ISC_R_SHUTTINGDOWN); - - client = NULL; - ISC_QUEUE_POP(manager->inactive, ilink, client); - if (client != NULL) - MTRACE("getclient (recycle)"); - else { - MTRACE("getclient (create)"); - - LOCK(&manager->lock); - result = client_create(manager, &client); - UNLOCK(&manager->lock); - if (result != ISC_R_SUCCESS) - return (result); - - LOCK(&manager->listlock); - ISC_LIST_APPEND(manager->clients, client, link); - UNLOCK(&manager->listlock); - } - - client->manager = manager; - ns_interface_attach(ifp, &client->interface); - client->state = NS_CLIENTSTATE_READY; - INSIST(client->recursionquota == NULL); - - client->dscp = ifp->dscp; - isc_refcount_increment(&client->references); - - if (tcp) { - client->attributes |= NS_CLIENTATTR_TCP; - } - - *clientp = client; - - return (result); -} - -isc_result_t -ns_clientmgr_createclients(ns_clientmgr_t *manager, unsigned int n, - ns_interface_t *ifp, bool tcp) -{ - isc_result_t result = ISC_R_SUCCESS; - unsigned int disp; - - REQUIRE(VALID_MANAGER(manager)); - REQUIRE(n > 0); - - MTRACE("createclients"); - - for (disp = 0; disp < n; disp++) { - result = get_client(manager, ifp, ifp->udpdispatch[disp], tcp); - if (result != ISC_R_SUCCESS) - break; - } - - return (result); -} - isc_sockaddr_t * ns_client_getsockaddr(ns_client_t *client) { return (&client->peeraddr); @@ -3960,7 +2508,8 @@ ns_client_checkaclsilent(ns_client_t *client, isc_netaddr_t *netaddr, dns_acl_t *acl, bool default_allow) { isc_result_t result; - dns_aclenv_t *env = ns_interfacemgr_getaclenv(client->interface->mgr); + dns_aclenv_t *env = + ns_interfacemgr_getaclenv(client->manager->interface->mgr); isc_netaddr_t tmpnetaddr; int match; diff --git a/lib/ns/include/ns/client.h b/lib/ns/include/ns/client.h index 02531f2118..613a91f822 100644 --- a/lib/ns/include/ns/client.h +++ b/lib/ns/include/ns/client.h @@ -36,10 +36,10 @@ * notified of this by calling one of the following functions * exactly once in the context of its task: * \code - * ns_client_send() (sending a non-error response) + * ns_client_send() (sending a non-error response) * ns_client_sendraw() (sending a raw response) - * ns_client_error() (sending an error response) - * ns_client_next() (sending no response) + * ns_client_error() (sending an error response) + * ns_client_drop() (sending no response, logging the reason) *\endcode * This will release any resources used by the request and * and allow the ns_client_t to listen for the next request. @@ -59,9 +59,9 @@ #include #include +#include #include #include -#include #include #include @@ -80,58 +80,141 @@ *** Types ***/ -/*% reference-counted TCP connection object */ -typedef struct ns_tcpconn { - isc_refcount_t refs; - isc_quota_t *tcpquota; - bool pipelined; -} ns_tcpconn_t; +#define NS_CLIENT_TCP_BUFFER_SIZE (65535 + 2) +#define NS_CLIENT_SEND_BUFFER_SIZE 4096 +#define NS_CLIENT_RECV_BUFFER_SIZE 4096 + +#define CLIENT_NMCTXS 100 +/*%< + * Number of 'mctx pools' for clients. (Should this be configurable?) + * When enabling threads, we use a pool of memory contexts shared by + * client objects, since concurrent access to a shared context would cause + * heavy contentions. The above constant is expected to be enough for + * completely avoiding contentions among threads for an authoritative-only + * server. + */ + +#define CLIENT_NTASKS 100 +/*%< + * Number of tasks to be used by clients - those are used only when recursing + */ + +/*! + * Client object states. Ordering is significant: higher-numbered + * states are generally "more active", meaning that the client can + * have more dynamically allocated data, outstanding events, etc. + * In the list below, any such properties listed for state N + * also apply to any state > N. + */ + +typedef enum { + NS_CLIENTSTATE_FREED = 0, + /*%< + * The client object no longer exists. + */ + + NS_CLIENTSTATE_INACTIVE = 1, + /*%< + * The client object exists and has a task and timer. + * Its "query" struct and sendbuf are initialized. + * It has a message and OPT, both in the reset state. + */ + + NS_CLIENTSTATE_READY = 2, + /*%< + * The client object is either a TCP or a UDP one, and + * it is associated with a network interface. It is on the + * client manager's list of active clients. + * + * If it is a TCP client object, it has a TCP listener socket + * and an outstanding TCP listen request. + * + * If it is a UDP client object, it has a UDP listener socket + * and an outstanding UDP receive request. + */ + + NS_CLIENTSTATE_WORKING = 3, + /*%< + * The client object has received a request and is working + * on it. It has a view, and it may have any of a non-reset OPT, + * recursion quota, and an outstanding write request. + */ + + NS_CLIENTSTATE_RECURSING = 4, + /*%< + * The client object is recursing. It will be on the + * 'recursing' list. + */ + + NS_CLIENTSTATE_MAX = 5 + /*%< + * Sentinel value used to indicate "no state". + */ +} ns_clientstate_t; + +typedef ISC_LIST(ns_client_t) client_list_t; + +/*% nameserver client manager structure */ +struct ns_clientmgr { + /* Unlocked. */ + unsigned int magic; + + isc_mem_t * mctx; + ns_server_t * sctx; + isc_taskmgr_t * taskmgr; + isc_timermgr_t * timermgr; + isc_task_t * excl; + isc_refcount_t references; + + /* Attached by clients, needed for e.g. recursion */ + isc_task_t ** taskpool; + + ns_interface_t *interface; + + /* Lock covers manager state. */ + isc_mutex_t lock; + bool exiting; + + /* Lock covers the recursing list */ + isc_mutex_t reclock; + client_list_t recursing; /*%< Recursing clients */ + +#if CLIENT_NMCTXS > 0 + /*%< mctx pool for clients. */ + unsigned int nextmctx; + isc_mem_t * mctxpool[CLIENT_NMCTXS]; +#endif +}; /*% nameserver client structure */ struct ns_client { unsigned int magic; isc_mem_t *mctx; + bool allocated; /* Do we need to free it? */ ns_server_t *sctx; ns_clientmgr_t *manager; - int state; - int newstate; + ns_clientstate_t state; int naccepts; int nreads; int nsends; int nrecvs; int nupdates; int nctls; - isc_refcount_t references; - bool tcpactive; - bool needshutdown; /* - * Used by clienttest to get - * the client to go from - * inactive to free state - * by shutting down the - * client's task. - */ + bool shuttingdown; unsigned int attributes; isc_task_t *task; dns_view_t *view; dns_dispatch_t *dispatch; - isc_socket_t *udpsocket; - isc_socket_t *tcplistener; - isc_socket_t *tcpsocket; + isc_nmhandle_t *handle; unsigned char *tcpbuf; - dns_tcpmsg_t tcpmsg; - bool tcpmsg_valid; - isc_timer_t *timer; - isc_timer_t *delaytimer; - bool timerset; dns_message_t *message; - isc_socketevent_t *sendevent; - isc_socketevent_t *recvevent; unsigned char *recvbuf; + unsigned char sendbuf[NS_CLIENT_SEND_BUFFER_SIZE]; dns_rdataset_t *opt; uint16_t udpsize; uint16_t extflags; int16_t ednsversion; /* -1 noedns */ - void (*next)(ns_client_t *); + void (*cleanup)(ns_client_t *); void (*shutdown)(void *arg, isc_result_t result); void *shutdown_arg; ns_query_t query; @@ -141,9 +224,7 @@ struct ns_client { dns_name_t signername; /*%< [T]SIG key name */ dns_name_t *signer; /*%< NULL if not valid sig */ bool mortal; /*%< Die after handling request */ - ns_tcpconn_t *tcpconn; isc_quota_t *recursionquota; - ns_interface_t *interface; isc_sockaddr_t peeraddr; bool peeraddr_valid; @@ -154,7 +235,6 @@ struct ns_client { struct in6_pktinfo pktinfo; isc_dscp_t dscp; - isc_event_t ctlevent; /*% * Information about recent FORMERR response(s), for * FORMERR loop avoidance. This is separate for each @@ -170,9 +250,7 @@ struct ns_client { /*% Callback function to send a response when unit testing */ void (*sendcb)(isc_buffer_t *buf); - ISC_LINK(ns_client_t) link; ISC_LINK(ns_client_t) rlink; - ISC_QLINK(ns_client_t) ilink; unsigned char cookie[8]; uint32_t expire; unsigned char *keytag; @@ -187,9 +265,6 @@ struct ns_client { int32_t rcode_override; }; -typedef ISC_QUEUE(ns_client_t) client_queue_t; -typedef ISC_LIST(ns_client_t) client_list_t; - #define NS_CLIENT_MAGIC ISC_MAGIC('N','S','C','c') #define NS_CLIENT_VALID(c) ISC_MAGIC_VALID(c, NS_CLIENT_MAGIC) @@ -256,10 +331,10 @@ ns_client_error(ns_client_t *client, isc_result_t result); */ void -ns_client_next(ns_client_t *client, isc_result_t result); +ns_client_drop(ns_client_t *client, isc_result_t result); /*%< - * Finish processing the current client request, - * return no response to the client. + * Log the reason the current client request has failed; no response + * will be sent. */ bool @@ -268,18 +343,6 @@ ns_client_shuttingdown(ns_client_t *client); * Return true iff the client is currently shutting down. */ -void -ns_client_attach(ns_client_t *source, ns_client_t **target); -/*%< - * Attach '*targetp' to 'source'. - */ - -void -ns_client_detach(ns_client_t **clientp); -/*%< - * Detach '*clientp' from its client. - */ - isc_result_t ns_client_replace(ns_client_t *client); /*%< @@ -296,7 +359,8 @@ ns_client_settimeout(ns_client_t *client, unsigned int seconds); isc_result_t ns_clientmgr_create(isc_mem_t *mctx, ns_server_t *sctx, isc_taskmgr_t *taskmgr, - isc_timermgr_t *timermgr, ns_clientmgr_t **managerp); + isc_timermgr_t *timermgr, ns_interface_t *ifp, + ns_clientmgr_t **managerp); /*%< * Create a client manager. */ @@ -308,15 +372,6 @@ ns_clientmgr_destroy(ns_clientmgr_t **managerp); * managed by it. */ -isc_result_t -ns_clientmgr_createclients(ns_clientmgr_t *manager, unsigned int n, - ns_interface_t *ifp, bool tcp); -/*%< - * Create up to 'n' clients listening on interface 'ifp'. - * If 'tcp' is true, the clients will listen for TCP connections, - * otherwise for UDP requests. - */ - isc_sockaddr_t * ns_client_getsockaddr(ns_client_t *client); /*%< @@ -427,16 +482,14 @@ isc_result_t ns_client_addopt(ns_client_t *client, dns_message_t *message, dns_rdataset_t **opt); -isc_result_t -ns__clientmgr_getclient(ns_clientmgr_t *manager, ns_interface_t *ifp, - bool tcp, ns_client_t **clientp); /* * Get a client object from the inactive queue, or create one, as needed. * (Not intended for use outside this module and associated tests.) */ void -ns__client_request(isc_task_t *task, isc_event_t *event); +ns__client_request(isc_nmhandle_t *handle, isc_region_t *region, void *arg); + /* * Handle client requests. * (Not intended for use outside this module and associated tests.) @@ -508,4 +561,24 @@ ns_client_findversion(ns_client_t *client, dns_db_t *db); * allocated by ns_client_newdbversion(). */ +isc_result_t +ns__client_setup(ns_client_t *client, ns_clientmgr_t *manager, bool new); +/*%< + * Perform initial setup of an allocated client. + */ + +void +ns__client_reset_cb(void *client0); +/*%< + * Reset the client object so that it can be reused. + */ + +void +ns__client_put_cb(void *client0); +/*%< + * Free all resources allocated to this client object, so that + * it can be freed. + */ + + #endif /* NS_CLIENT_H */ diff --git a/lib/ns/include/ns/interfacemgr.h b/lib/ns/include/ns/interfacemgr.h index 82d76f9e41..0212460cc4 100644 --- a/lib/ns/include/ns/interfacemgr.h +++ b/lib/ns/include/ns/interfacemgr.h @@ -44,6 +44,7 @@ #include #include +#include #include #include @@ -66,16 +67,18 @@ /*% The nameserver interface structure */ struct ns_interface { unsigned int magic; /*%< Magic number. */ - ns_interfacemgr_t * mgr; /*%< Interface manager. */ + ns_interfacemgr_t *mgr; /*%< Interface manager. */ isc_mutex_t lock; isc_refcount_t references; unsigned int generation; /*%< Generation number. */ isc_sockaddr_t addr; /*%< Address and port. */ - unsigned int flags; /*%< Interface characteristics */ + unsigned int flags; /*%< Interface flags */ char name[32]; /*%< Null terminated. */ - dns_dispatch_t * udpdispatch[MAX_UDP_DISPATCH]; + dns_dispatch_t *udpdispatch[MAX_UDP_DISPATCH]; /*%< UDP dispatchers. */ - isc_socket_t * tcpsocket; /*%< TCP socket. */ + isc_socket_t *tcpsocket; /*%< TCP socket. */ + isc_nmsocket_t *udplistensocket; + isc_nmsocket_t *tcplistensocket; isc_dscp_t dscp; /*%< "listen-on" DSCP value */ isc_refcount_t ntcpaccepting; /*%< Number of clients ready to accept new @@ -86,7 +89,7 @@ struct ns_interface { (whether accepting or connected) */ int nudpdispatch; /*%< Number of UDP dispatches */ - ns_clientmgr_t * clientmgr; /*%< Client manager. */ + ns_clientmgr_t *clientmgr; /*%< Client manager. */ ISC_LINK(ns_interface_t) link; }; @@ -95,15 +98,11 @@ struct ns_interface { ***/ isc_result_t -ns_interfacemgr_create(isc_mem_t *mctx, - ns_server_t *sctx, - isc_taskmgr_t *taskmgr, - isc_timermgr_t *timermgr, - isc_socketmgr_t *socketmgr, - dns_dispatchmgr_t *dispatchmgr, - isc_task_t *task, - unsigned int udpdisp, - dns_geoip_databases_t *geoip, +ns_interfacemgr_create(isc_mem_t *mctx, ns_server_t *sctx, + isc_taskmgr_t *taskmgr, isc_timermgr_t *timermgr, + isc_socketmgr_t *socketmgr, isc_nm_t *nm, + dns_dispatchmgr_t *dispatchmgr, isc_task_t *task, + unsigned int udpdisp, dns_geoip_databases_t *geoip, ns_interfacemgr_t **mgrp); /*%< * Create a new interface manager. diff --git a/lib/ns/include/ns/server.h b/lib/ns/include/ns/server.h index c4a0f71ecd..fa27bdd446 100644 --- a/lib/ns/include/ns/server.h +++ b/lib/ns/include/ns/server.h @@ -36,7 +36,6 @@ #define NS_SERVER_NOAA 0x00000002U /*%< -T noaa */ #define NS_SERVER_NOSOA 0x00000004U /*%< -T nosoa */ #define NS_SERVER_NONEAREST 0x00000008U /*%< -T nonearest */ -#define NS_SERVER_CLIENTTEST 0x00000010U /*%< -T clienttest */ #define NS_SERVER_NOEDNS 0x00000020U /*%< -T noedns */ #define NS_SERVER_DROPEDNS 0x00000040U /*%< -T dropedns */ #define NS_SERVER_NOTCP 0x00000080U /*%< -T notcp */ diff --git a/lib/ns/interfacemgr.c b/lib/ns/interfacemgr.c index a473d50eea..0d932857bb 100644 --- a/lib/ns/interfacemgr.c +++ b/lib/ns/interfacemgr.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -72,6 +73,7 @@ struct ns_interfacemgr { isc_task_t * excl; /*%< Exclusive task. */ isc_timermgr_t * timermgr; /*%< Timer manager. */ isc_socketmgr_t * socketmgr; /*%< Socket manager. */ + isc_nm_t * nm; /*%< Net manager. */ dns_dispatchmgr_t * dispatchmgr; unsigned int generation; /*%< Current generation no. */ ns_listenlist_t * listenon4; @@ -172,6 +174,7 @@ ns_interfacemgr_create(isc_mem_t *mctx, isc_taskmgr_t *taskmgr, isc_timermgr_t *timermgr, isc_socketmgr_t *socketmgr, + isc_nm_t *nm, dns_dispatchmgr_t *dispatchmgr, isc_task_t *task, unsigned int udpdisp, @@ -208,6 +211,7 @@ ns_interfacemgr_create(isc_mem_t *mctx, mgr->taskmgr = taskmgr; mgr->timermgr = timermgr; mgr->socketmgr = socketmgr; + mgr->nm = nm; mgr->dispatchmgr = dispatchmgr; mgr->generation = 1; mgr->listenon4 = NULL; @@ -249,8 +253,9 @@ ns_interfacemgr_create(isc_mem_t *mctx, } mgr->task = NULL; - if (mgr->route != NULL) + if (mgr->route != NULL) { isc_task_attach(task, &mgr->task); + } isc_refcount_init(&mgr->references, (mgr->route != NULL) ? 2 : 1); #else isc_refcount_init(&mgr->references, 1); @@ -379,30 +384,19 @@ ns_interface_create(ns_interfacemgr_t *mgr, isc_sockaddr_t *addr, REQUIRE(NS_INTERFACEMGR_VALID(mgr)); ifp = isc_mem_get(mgr->mctx, sizeof(*ifp)); + *ifp = (ns_interface_t){ + .generation = mgr->generation, + .addr = *addr, + .dscp = -1 + }; - ifp->mgr = NULL; - ifp->generation = mgr->generation; - ifp->addr = *addr; - ifp->flags = 0; strlcpy(ifp->name, name, sizeof(ifp->name)); - ifp->clientmgr = NULL; isc_mutex_init(&ifp->lock); - result = ns_clientmgr_create(mgr->mctx, mgr->sctx, - mgr->taskmgr, mgr->timermgr, - &ifp->clientmgr); - if (result != ISC_R_SUCCESS) { - isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_ERROR, - "ns_clientmgr_create() failed: %s", - isc_result_totext(result)); - goto clientmgr_create_failure; - } - - for (disp = 0; disp < MAX_UDP_DISPATCH; disp++) + for (disp = 0; disp < MAX_UDP_DISPATCH; disp++) { ifp->udpdispatch[disp] = NULL; - - ifp->tcpsocket = NULL; + } /* * Create a single TCP client object. It will replace itself @@ -413,10 +407,6 @@ ns_interface_create(ns_interfacemgr_t *mgr, isc_sockaddr_t *addr, isc_refcount_init(&ifp->ntcpaccepting, 0); isc_refcount_init(&ifp->ntcpactive, 0); - ifp->nudpdispatch = 0; - - ifp->dscp = -1; - ISC_LINK_INIT(ifp, link); ns_interfacemgr_attach(mgr, &ifp->mgr); @@ -424,11 +414,22 @@ ns_interface_create(ns_interfacemgr_t *mgr, isc_sockaddr_t *addr, isc_refcount_init(&ifp->references, 1); ifp->magic = IFACE_MAGIC; + + result = ns_clientmgr_create(mgr->mctx, mgr->sctx, + mgr->taskmgr, mgr->timermgr, ifp, + &ifp->clientmgr); + if (result != ISC_R_SUCCESS) { + isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_ERROR, + "ns_clientmgr_create() failed: %s", + isc_result_totext(result)); + goto failure; + } + *ifpret = ifp; return (ISC_R_SUCCESS); - clientmgr_create_failure: + failure: isc_mutex_destroy(&ifp->lock); ifp->magic = 0; @@ -440,127 +441,43 @@ ns_interface_create(ns_interfacemgr_t *mgr, isc_sockaddr_t *addr, static isc_result_t ns_interface_listenudp(ns_interface_t *ifp) { isc_result_t result; - unsigned int attrs; - unsigned int attrmask; - int disp, i; - attrs = 0; - attrs |= DNS_DISPATCHATTR_UDP; - if (isc_sockaddr_pf(&ifp->addr) == AF_INET) - attrs |= DNS_DISPATCHATTR_IPV4; - else - attrs |= DNS_DISPATCHATTR_IPV6; - attrs |= DNS_DISPATCHATTR_NOLISTEN; - attrs |= DNS_DISPATCHATTR_CANREUSE; - attrmask = 0; - attrmask |= DNS_DISPATCHATTR_UDP | DNS_DISPATCHATTR_TCP; - attrmask |= DNS_DISPATCHATTR_IPV4 | DNS_DISPATCHATTR_IPV6; - - ifp->nudpdispatch = ISC_MIN(ifp->mgr->udpdisp, MAX_UDP_DISPATCH); - for (disp = 0; disp < ifp->nudpdispatch; disp++) { - result = dns_dispatch_getudp_dup(ifp->mgr->dispatchmgr, - ifp->mgr->socketmgr, - ifp->mgr->taskmgr, &ifp->addr, - 4096, UDPBUFFERS, - 32768, 8219, 8237, - attrs, attrmask, - &ifp->udpdispatch[disp], - disp == 0 - ? NULL - : ifp->udpdispatch[0]); - if (result != ISC_R_SUCCESS) { - isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_ERROR, - "could not listen on UDP socket: %s", - isc_result_totext(result)); - goto udp_dispatch_failure; - } - - } - - result = ns_clientmgr_createclients(ifp->clientmgr, ifp->nudpdispatch, - ifp, false); - if (result != ISC_R_SUCCESS) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "UDP ns_clientmgr_createclients(): %s", - isc_result_totext(result)); - goto addtodispatch_failure; - } - - return (ISC_R_SUCCESS); - - addtodispatch_failure: - for (i = disp - 1; i >= 0; i--) { - dns_dispatch_changeattributes(ifp->udpdispatch[i], 0, - DNS_DISPATCHATTR_NOLISTEN); - dns_dispatch_detach(&(ifp->udpdispatch[i])); - } - ifp->nudpdispatch = 0; - - udp_dispatch_failure: + /* Reserve space for an ns_client_t with the netmgr handle */ + result = isc_nm_listenudp(ifp->mgr->nm, + (isc_nmiface_t *) &ifp->addr, + ns__client_request, ifp, + sizeof(ns_client_t), + &ifp->udplistensocket); return (result); } static isc_result_t -ns_interface_accepttcp(ns_interface_t *ifp) { +ns_interface_listentcp(ns_interface_t *ifp) { isc_result_t result; - /* - * Open a TCP socket. - */ - result = isc_socket_create(ifp->mgr->socketmgr, - isc_sockaddr_pf(&ifp->addr), - isc_sockettype_tcp, - &ifp->tcpsocket); + /* Reserve space for an ns_client_t with the netmgr handle */ + result = isc_nm_listentcpdns(ifp->mgr->nm, + (isc_nmiface_t *) &ifp->addr, + ns__client_request, ifp, + sizeof(ns_client_t), + &ifp->mgr->sctx->tcpquota, + &ifp->tcplistensocket); if (result != ISC_R_SUCCESS) { isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_ERROR, "creating TCP socket: %s", isc_result_totext(result)); - goto tcp_socket_failure; } - isc_socket_setname(ifp->tcpsocket, "dispatcher", NULL); + +#if 0 #ifndef ISC_ALLOW_MAPPED isc_socket_ipv6only(ifp->tcpsocket, true); #endif - result = isc_socket_bind(ifp->tcpsocket, &ifp->addr, - ISC_SOCKET_REUSEADDRESS); - if (result != ISC_R_SUCCESS) { - isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_ERROR, - "binding TCP socket: %s", - isc_result_totext(result)); - goto tcp_bind_failure; - } if (ifp->dscp != -1) isc_socket_dscp(ifp->tcpsocket, ifp->dscp); - result = isc_socket_listen(ifp->tcpsocket, ifp->mgr->backlog); - if (result != ISC_R_SUCCESS) { - isc_log_write(IFMGR_COMMON_LOGARGS, ISC_LOG_ERROR, - "listening on TCP socket: %s", - isc_result_totext(result)); - goto tcp_listen_failure; - } - - /* - * If/when there a multiple filters listen to the - * result. - */ (void)isc_socket_filter(ifp->tcpsocket, "dataready"); - - result = ns_clientmgr_createclients(ifp->clientmgr, 1, ifp, true); - if (result != ISC_R_SUCCESS) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "TCP ns_clientmgr_createclients(): %s", - isc_result_totext(result)); - goto accepttcp_failure; - } - return (ISC_R_SUCCESS); - - accepttcp_failure: - tcp_listen_failure: - tcp_bind_failure: - isc_socket_detach(&ifp->tcpsocket); - tcp_socket_failure: +#endif return (result); } @@ -591,7 +508,7 @@ ns_interface_setup(ns_interfacemgr_t *mgr, isc_sockaddr_t *addr, if (((mgr->sctx->options & NS_SERVER_NOTCP) == 0) && accept_tcp == true) { - result = ns_interface_accepttcp(ifp); + result = ns_interface_listentcp(ifp); if (result != ISC_R_SUCCESS) { if ((result == ISC_R_ADDRINUSE) && (addr_in_use != NULL)) @@ -617,8 +534,17 @@ ns_interface_setup(ns_interfacemgr_t *mgr, isc_sockaddr_t *addr, void ns_interface_shutdown(ns_interface_t *ifp) { - if (ifp->clientmgr != NULL) + if (ifp->udplistensocket != NULL) { + isc_nm_udp_stoplistening(ifp->udplistensocket); + isc_nmsocket_detach(&ifp->udplistensocket); + } + if (ifp->tcplistensocket != NULL) { + isc_nm_tcpdns_stoplistening(ifp->tcplistensocket); + isc_nmsocket_detach(&ifp->tcplistensocket); + } + if (ifp->clientmgr != NULL) { ns_clientmgr_destroy(&ifp->clientmgr); + } } static void diff --git a/lib/ns/notify.c b/lib/ns/notify.c index 0eee750ae3..3c0ad5db17 100644 --- a/lib/ns/notify.c +++ b/lib/ns/notify.c @@ -52,7 +52,8 @@ respond(ns_client_t *client, isc_result_t result) { if (msg_result != ISC_R_SUCCESS) msg_result = dns_message_reply(message, false); if (msg_result != ISC_R_SUCCESS) { - ns_client_next(client, msg_result); + ns_client_drop(client, msg_result); + isc_nmhandle_unref(client->handle); return; } message->rcode = rcode; @@ -60,7 +61,9 @@ respond(ns_client_t *client, isc_result_t result) { message->flags |= DNS_MESSAGEFLAG_AA; else message->flags &= ~DNS_MESSAGEFLAG_AA; + ns_client_send(client); + isc_nmhandle_unref(client->handle); } void diff --git a/lib/ns/query.c b/lib/ns/query.c index 4e43b1d92c..7adf04d96f 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -560,6 +560,7 @@ query_send(ns_client_t *client) { inc_stats(client, counter); ns_client_send(client); + isc_nmhandle_unref(client->handle); } static void @@ -585,17 +586,20 @@ query_error(ns_client_t *client, isc_result_t result, int line) { log_queryerror(client, result, line, loglevel); ns_client_error(client, result); + isc_nmhandle_unref(client->handle); } static void query_next(ns_client_t *client, isc_result_t result) { - if (result == DNS_R_DUPLICATE) + if (result == DNS_R_DUPLICATE) { inc_stats(client, ns_statscounter_duplicate); - else if (result == DNS_R_DROP) + } else if (result == DNS_R_DROP) { inc_stats(client, ns_statscounter_dropped); - else + } else { inc_stats(client, ns_statscounter_failure); - ns_client_next(client, result); + } + ns_client_drop(client, result); + isc_nmhandle_unref(client->handle); } static inline void @@ -655,7 +659,8 @@ query_reset(ns_client_t *client, bool everything) { */ for (dbversion = ISC_LIST_HEAD(client->query.activeversions); dbversion != NULL; - dbversion = dbversion_next) { + dbversion = dbversion_next) + { dbversion_next = ISC_LIST_NEXT(dbversion, link); dns_db_closeversion(dbversion->db, &dbversion->version, false); @@ -742,7 +747,7 @@ query_reset(ns_client_t *client, bool everything) { } static void -query_next_callback(ns_client_t *client) { +query_cleanup(ns_client_t *client) { query_reset(client, false); } @@ -2423,6 +2428,8 @@ free_devent(ns_client_t *client, isc_event_t **eventp, REQUIRE((void*)(*eventp) == (void *)(*deventp)); + CTRACE(ISC_LOG_DEBUG(3), "free_devent"); + if (devent->fetch != NULL) { dns_resolver_destroyfetch(&devent->fetch); } @@ -2438,12 +2445,14 @@ free_devent(ns_client_t *client, isc_event_t **eventp, if (devent->sigrdataset != NULL) { ns_client_putrdataset(client, &devent->sigrdataset); } + /* * If the two pointers are the same then leave the setting of * (*deventp) to NULL to isc_event_free. */ - if ((void *)eventp != (void *)deventp) + if ((void *)eventp != (void *)deventp) { (*deventp) = NULL; + } isc_event_free(eventp); } @@ -2459,14 +2468,17 @@ prefetch_done(isc_task_t *task, isc_event_t *event) { REQUIRE(NS_CLIENT_VALID(client)); REQUIRE(task == client->task); + CTRACE(ISC_LOG_DEBUG(3), "prefetch_done"); + LOCK(&client->query.fetchlock); if (client->query.prefetch != NULL) { INSIST(devent->fetch == client->query.prefetch); client->query.prefetch = NULL; } UNLOCK(&client->query.fetchlock); + free_devent(client, &event, &devent); - ns_client_detach(&client); + isc_nmhandle_unref(client->handle); } static void @@ -2476,40 +2488,38 @@ query_prefetch(ns_client_t *client, dns_name_t *qname, isc_result_t result; isc_sockaddr_t *peeraddr; dns_rdataset_t *tmprdataset; - ns_client_t *dummy = NULL; unsigned int options; + CTRACE(ISC_LOG_DEBUG(3), "query_prefetch"); + if (client->query.prefetch != NULL || client->view->prefetch_trigger == 0U || rdataset->ttl > client->view->prefetch_trigger || (rdataset->attributes & DNS_RDATASETATTR_PREFETCH) == 0) + { return; + } if (client->recursionquota == NULL) { result = isc_quota_attach(&client->sctx->recursionquota, &client->recursionquota); - if (result == ISC_R_SUCCESS || result == ISC_R_SOFTQUOTA) { - ns_stats_increment(client->sctx->nsstats, - ns_statscounter_recursclients); - } - if (result == ISC_R_SUCCESS && !client->mortal && - !TCP(client)) - { - result = ns_client_replace(client); - } if (result != ISC_R_SUCCESS) { return; } } tmprdataset = ns_client_newrdataset(client); - if (tmprdataset == NULL) + if (tmprdataset == NULL) { return; - if (!TCP(client)) + } + + if (!TCP(client)) { peeraddr = &client->peeraddr; - else + } else { peeraddr = NULL; - ns_client_attach(client, &dummy); + } + + isc_nmhandle_ref(client->handle); options = client->query.fetchoptions | DNS_FETCHOPT_PREFETCH; result = dns_resolver_createfetch(client->view->resolver, qname, rdataset->type, NULL, NULL, @@ -2520,8 +2530,9 @@ query_prefetch(ns_client_t *client, dns_name_t *qname, &client->query.prefetch); if (result != ISC_R_SUCCESS) { ns_client_putrdataset(client, &tmprdataset); - ns_client_detach(&dummy); + isc_nmhandle_unref(client->handle); } + dns_rdataset_clearprefetch(rdataset); ns_stats_increment(client->sctx->nsstats, ns_statscounter_prefetch); @@ -2692,38 +2703,34 @@ query_rpzfetch(ns_client_t *client, dns_name_t *qname, dns_rdatatype_t type) { isc_result_t result; isc_sockaddr_t *peeraddr; dns_rdataset_t *tmprdataset; - ns_client_t *dummy = NULL; unsigned int options; + CTRACE(ISC_LOG_DEBUG(3), "query_rpzfetch"); + if (client->query.prefetch != NULL) return; if (client->recursionquota == NULL) { result = isc_quota_attach(&client->sctx->recursionquota, &client->recursionquota); - if (result == ISC_R_SUCCESS || result == ISC_R_SOFTQUOTA) { - ns_stats_increment(client->sctx->nsstats, - ns_statscounter_recursclients); - } - if (result == ISC_R_SUCCESS && !client->mortal && - !TCP(client)) - { - result = ns_client_replace(client); - } if (result != ISC_R_SUCCESS) { return; } } tmprdataset = ns_client_newrdataset(client); - if (tmprdataset == NULL) + if (tmprdataset == NULL) { return; - if (!TCP(client)) + } + + if (!TCP(client)) { peeraddr = &client->peeraddr; - else + } else { peeraddr = NULL; - ns_client_attach(client, &dummy); + } + options = client->query.fetchoptions; + isc_nmhandle_ref(client->handle); result = dns_resolver_createfetch(client->view->resolver, qname, type, NULL, NULL, NULL, peeraddr, client->message->id, options, 0, @@ -2732,7 +2739,7 @@ query_rpzfetch(ns_client_t *client, dns_name_t *qname, dns_rdatatype_t type) { &client->query.prefetch); if (result != ISC_R_SUCCESS) { ns_client_putrdataset(client, &tmprdataset); - ns_client_detach(&dummy); + isc_nmhandle_unref(client->handle); } } @@ -4644,7 +4651,8 @@ dns64_aaaaok(ns_client_t *client, dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) { isc_netaddr_t netaddr; - dns_aclenv_t *env = ns_interfacemgr_getaclenv(client->interface->mgr); + dns_aclenv_t *env = + ns_interfacemgr_getaclenv(client->manager->interface->mgr); dns_dns64_t *dns64 = ISC_LIST_HEAD(client->view->dns64); unsigned int flags = 0; unsigned int i, count; @@ -5001,6 +5009,7 @@ qctx_init(ns_client_t *client, dns_fetchevent_t *event, /* Set this first so CCTRACE will work */ qctx->client = client; + dns_view_attach(client->view, &qctx->view); CCTRACE(ISC_LOG_DEBUG(3), "qctx_init"); @@ -5078,9 +5087,6 @@ qctx_destroy(query_ctx_t *qctx) { CALL_HOOK_NORETURN(NS_QUERY_QCTX_DESTROYED, qctx); dns_view_detach(&qctx->view); - if (qctx->detach_client) { - ns_client_detach(&qctx->client); - } } /*% @@ -5583,6 +5589,8 @@ fetch_callback(isc_task_t *task, isc_event_t *event) { REQUIRE(task == client->task); REQUIRE(RECURSING(client)); + CTRACE(ISC_LOG_DEBUG(3), "fetch_callback"); + LOCK(&client->query.fetchlock); if (client->query.fetch != NULL) { /* @@ -5605,9 +5613,28 @@ fetch_callback(isc_task_t *task, isc_event_t *event) { UNLOCK(&client->query.fetchlock); INSIST(client->query.fetch == NULL); - client->query.attributes &= ~NS_QUERYATTR_RECURSING; SAVE(fetch, devent->fetch); + /* + * We're done recursing, detach from quota and unlink from + * the manager's recursing-clients list. + */ + + if (client->recursionquota != NULL) { + isc_quota_detach(&client->recursionquota); + ns_stats_decrement(client->sctx->nsstats, + ns_statscounter_recursclients); + } + + LOCK(&client->manager->reclock); + if (ISC_LINK_LINKED(client, rlink)) { + ISC_LIST_UNLINK(client->manager->recursing, client, rlink); + } + UNLOCK(&client->manager->reclock); + + client->query.attributes &= ~NS_QUERYATTR_RECURSING; + client->state = NS_CLIENTSTATE_WORKING; + /* * If this client is shutting down, or this transaction * has timed out, do not resume the find. @@ -5621,10 +5648,6 @@ fetch_callback(isc_task_t *task, isc_event_t *event) { } else { query_next(client, ISC_R_CANCELED); } - /* - * This may destroy the client. - */ - ns_client_detach(&client); } else { query_ctx_t qctx; @@ -5654,6 +5677,7 @@ fetch_callback(isc_task_t *task, isc_event_t *event) { } dns_resolver_destroyfetch(&fetch); + isc_nmhandle_unref(client->handle); } /*% @@ -5733,8 +5757,9 @@ ns_query_recurse(ns_client_t *client, dns_rdatatype_t qtype, dns_name_t *qname, recparam_update(&client->query.recparam, qtype, qname, qdomain); - if (!resuming) + if (!resuming) { inc_stats(client, ns_statscounter_recursion); + } /* * We are about to recurse, which means that this client will @@ -5793,22 +5818,10 @@ ns_query_recurse(ns_client_t *client, dns_rdatatype_t qtype, dns_name_t *qname, } ns_client_killoldestquery(client); } - if (result == ISC_R_SUCCESS && !client->mortal && - !TCP(client)) { - result = ns_client_replace(client); - if (result != ISC_R_SUCCESS) { - ns_client_log(client, NS_LOGCATEGORY_CLIENT, - NS_LOGMODULE_QUERY, - ISC_LOG_WARNING, - "ns_client_replace() failed: %s", - isc_result_totext(result)); - isc_quota_detach(&client->recursionquota); - ns_stats_decrement(client->sctx->nsstats, - ns_statscounter_recursclients); - } - } - if (result != ISC_R_SUCCESS) + if (result != ISC_R_SUCCESS) { return (result); + } + ns_client_recursing(client); } @@ -5841,6 +5854,7 @@ ns_query_recurse(ns_client_t *client, dns_rdatatype_t qtype, dns_name_t *qname, peeraddr = &client->peeraddr; } + isc_nmhandle_ref(client->handle); result = dns_resolver_createfetch(client->view->resolver, qname, qtype, qdomain, nameservers, NULL, peeraddr, client->message->id, @@ -5849,6 +5863,7 @@ ns_query_recurse(ns_client_t *client, dns_rdatatype_t qtype, dns_name_t *qname, client, rdataset, sigrdataset, &client->query.fetch); if (result != ISC_R_SUCCESS) { + isc_nmhandle_unref(client->handle); ns_client_putrdataset(client, &rdataset); if (sigrdataset != NULL) { ns_client_putrdataset(client, &sigrdataset); @@ -5879,6 +5894,8 @@ query_resume(query_ctx_t *qctx) { char tbuf[DNS_RDATATYPE_FORMATSIZE]; #endif + CCTRACE(ISC_LOG_DEBUG(3), "query_resume"); + CALL_HOOK(NS_QUERY_RESUME_BEGIN, qctx); qctx->want_restart = false; @@ -6320,6 +6337,8 @@ static isc_result_t query_checkrpz(query_ctx_t *qctx, isc_result_t result) { isc_result_t rresult; + CCTRACE(ISC_LOG_DEBUG(3), "query_checkrpz"); + rresult = rpz_rewrite(qctx->client, qctx->qtype, result, qctx->resuming, qctx->rdataset, qctx->sigrdataset); @@ -6902,6 +6921,8 @@ query_respond_any(query_ctx_t *qctx) { dns_rdatatype_t onetype = 0; /* type to use for minimal-any */ isc_buffer_t b; + CCTRACE(ISC_LOG_DEBUG(3), "query_respond_any"); + CALL_HOOK(NS_QUERY_RESPOND_ANY_BEGIN, qctx); result = dns_db_allrdatasets(qctx->db, qctx->node, @@ -7123,6 +7144,8 @@ static void query_getexpire(query_ctx_t *qctx) { dns_zone_t *raw = NULL, *mayberaw; + CCTRACE(ISC_LOG_DEBUG(3), "query_getexpire"); + if (qctx->zone == NULL || !qctx->is_zone || qctx->qtype != dns_rdatatype_soa || qctx->client->query.restarts != 0 || @@ -7177,6 +7200,8 @@ query_addanswer(query_ctx_t *qctx) { dns_rdataset_t **sigrdatasetp = NULL; isc_result_t result; + CCTRACE(ISC_LOG_DEBUG(3), "query_addanswer"); + CALL_HOOK(NS_QUERY_ADDANSWER_BEGIN, qctx); if (qctx->dns64) { @@ -7237,6 +7262,8 @@ static isc_result_t query_respond(query_ctx_t *qctx) { isc_result_t result; + CCTRACE(ISC_LOG_DEBUG(3), "query_respond"); + /* * Check to see if the AAAA RRset has non-excluded addresses * in it. If not look for a A RRset. @@ -7332,7 +7359,8 @@ query_respond(query_ctx_t *qctx) { static isc_result_t query_dns64(query_ctx_t *qctx) { ns_client_t *client = qctx->client; - dns_aclenv_t *env = ns_interfacemgr_getaclenv(client->interface->mgr); + dns_aclenv_t *env = + ns_interfacemgr_getaclenv(client->manager->interface->mgr); dns_name_t *name, *mname; dns_rdata_t *dns64_rdata; dns_rdata_t rdata = DNS_RDATA_INIT; @@ -7677,6 +7705,8 @@ static isc_result_t query_notfound(query_ctx_t *qctx) { isc_result_t result; + CCTRACE(ISC_LOG_DEBUG(3), "query_notfound"); + CALL_HOOK(NS_QUERY_NOTFOUND_BEGIN, qctx); INSIST(!qctx->is_zone); @@ -7915,6 +7945,8 @@ static isc_result_t query_delegation(query_ctx_t *qctx) { isc_result_t result; + CCTRACE(ISC_LOG_DEBUG(3), "query_delegation"); + CALL_HOOK(NS_QUERY_DELEGATION_BEGIN, qctx); qctx->authoritative = false; @@ -7989,6 +8021,8 @@ query_delegation_recurse(query_ctx_t *qctx) { isc_result_t result; dns_name_t *qname = qctx->client->query.qname; + CCTRACE(ISC_LOG_DEBUG(3), "query_delegation_recurse"); + if (!RECURSIONOK(qctx->client)) { return (ISC_R_COMPLETE); } @@ -8186,6 +8220,8 @@ static isc_result_t query_nodata(query_ctx_t *qctx, isc_result_t res) { isc_result_t result = res; + CCTRACE(ISC_LOG_DEBUG(3), "query_nodata"); + CALL_HOOK(NS_QUERY_NODATA_BEGIN, qctx); #ifdef dns64_bis_return_excluded_addresses @@ -8308,6 +8344,9 @@ query_nodata(query_ctx_t *qctx, isc_result_t res) { isc_result_t query_sign_nodata(query_ctx_t *qctx) { isc_result_t result; + + CCTRACE(ISC_LOG_DEBUG(3), "query_sign_nodata"); + /* * Look for a NSEC3 record if we don't have a NSEC record. */ @@ -8504,6 +8543,8 @@ query_nxdomain(query_ctx_t *qctx, bool empty_wild) { uint32_t ttl; isc_result_t result; + CCTRACE(ISC_LOG_DEBUG(3), "query_nxdomain"); + CALL_HOOK(NS_QUERY_NXDOMAIN_BEGIN, qctx); INSIST(qctx->is_zone || REDIRECT(qctx->client)); @@ -8595,6 +8636,8 @@ static isc_result_t query_redirect(query_ctx_t *qctx) { isc_result_t result; + CCTRACE(ISC_LOG_DEBUG(3), "query_redirect"); + result = redirect(qctx->client, qctx->fname, qctx->rdataset, &qctx->node, &qctx->db, &qctx->version, qctx->type); @@ -8790,6 +8833,8 @@ query_synthwildcard(query_ctx_t *qctx, dns_rdataset_t *rdataset, dns_rdataset_t *cloneset = NULL, *clonesigset = NULL; dns_rdataset_t **sigrdatasetp; + CCTRACE(ISC_LOG_DEBUG(3), "query_synthwildcard"); + /* * We want the answer to be first, so save the * NOQNAME proof's name now or else discard it. @@ -8943,6 +8988,8 @@ query_synthnxdomain(query_ctx_t *qctx, isc_result_t result; dns_rdataset_t *cloneset = NULL, *clonesigset = NULL; + CCTRACE(ISC_LOG_DEBUG(3), "query_synthnxdomain"); + /* * Detemine the correct TTL to use for the SOA and RRSIG */ @@ -9109,6 +9156,8 @@ query_coveringnsec(query_ctx_t *qctx) { isc_result_t result = ISC_R_SUCCESS; unsigned int dboptions = qctx->client->query.dboptions; + CCTRACE(ISC_LOG_DEBUG(3), "query_coveringnsec"); + dns_rdataset_init(&rdataset); dns_rdataset_init(&sigrdataset); @@ -9367,6 +9416,8 @@ query_ncache(query_ctx_t *qctx, isc_result_t result) { result == DNS_R_NCACHENXRRSET || result == DNS_R_NXDOMAIN); + CCTRACE(ISC_LOG_DEBUG(3), "query_ncache"); + CALL_HOOK(NS_QUERY_NCACHE_BEGIN, qctx); qctx->authoritative = false; @@ -9402,6 +9453,8 @@ static isc_result_t query_zerottl_refetch(query_ctx_t *qctx) { isc_result_t result; + CCTRACE(ISC_LOG_DEBUG(3), "query_zerottl_refetch"); + if (qctx->is_zone || qctx->resuming || STALE(qctx->rdataset) || qctx->rdataset->ttl != 0 || !RECURSIONOK(qctx->client)) { @@ -9417,8 +9470,7 @@ query_zerottl_refetch(query_ctx_t *qctx) { NULL, NULL, qctx->resuming); if (result == ISC_R_SUCCESS) { CALL_HOOK(NS_QUERY_ZEROTTL_RECURSE, qctx); - qctx->client->query.attributes |= - NS_QUERYATTR_RECURSING; + qctx->client->query.attributes |= NS_QUERYATTR_RECURSING; if (qctx->dns64) { qctx->client->query.attributes |= @@ -9450,6 +9502,8 @@ query_cname(query_ctx_t *qctx) { dns_rdata_t rdata = DNS_RDATA_INIT; dns_rdata_cname_t cname; + CCTRACE(ISC_LOG_DEBUG(3), "query_cname"); + CALL_HOOK(NS_QUERY_CNAME_BEGIN, qctx); result = query_zerottl_refetch(qctx); @@ -9559,6 +9613,8 @@ query_dname(query_ctx_t *qctx) { isc_result_t result; unsigned int nlabels; + CCTRACE(ISC_LOG_DEBUG(3), "query_dname"); + CALL_HOOK(NS_QUERY_DNAME_BEGIN, qctx); /* @@ -9774,6 +9830,8 @@ static isc_result_t query_prepresponse(query_ctx_t *qctx) { isc_result_t result; + CCTRACE(ISC_LOG_DEBUG(3), "query_prepresponse"); + CALL_HOOK(NS_QUERY_PREP_RESPONSE_BEGIN, qctx); if (WANTDNSSEC(qctx->client) && @@ -10599,7 +10657,8 @@ static void query_setup_sortlist(query_ctx_t *qctx) { isc_netaddr_t netaddr; ns_client_t *client = qctx->client; - dns_aclenv_t *env = ns_interfacemgr_getaclenv(client->interface->mgr); + dns_aclenv_t *env = + ns_interfacemgr_getaclenv(client->manager->interface->mgr); const void *order_arg = NULL; isc_netaddr_fromsockaddr(&netaddr, &client->peeraddr); @@ -10932,7 +10991,6 @@ ns_query_start(ns_client_t *client) { isc_result_t result; dns_message_t *message; dns_rdataset_t *rdataset; - ns_client_t *qclient; dns_rdatatype_t qtype; unsigned int saved_extflags; unsigned int saved_flags; @@ -10945,26 +11003,10 @@ ns_query_start(ns_client_t *client) { CTRACE(ISC_LOG_DEBUG(3), "ns_query_start"); - /* - * Test only. - */ - if (((client->sctx->options & NS_SERVER_CLIENTTEST) != 0) && - !TCP(client)) - { - result = ns_client_replace(client); - if (result == ISC_R_SHUTTINGDOWN) { - ns_client_next(client, result); - return; - } else if (result != ISC_R_SUCCESS) { - query_error(client, result, __LINE__); - return; - } - } - /* * Ensure that appropriate cleanups occur. */ - client->next = query_next_callback; + client->cleanup = query_cleanup; if ((message->flags & DNS_MESSAGEFLAG_RD) != 0) client->query.attributes |= NS_QUERYATTR_WANTRECURSION; @@ -11035,8 +11077,9 @@ ns_query_start(ns_client_t *client) { * section. */ query_error(client, DNS_R_FORMERR, __LINE__); - } else + } else { query_error(client, result, __LINE__); + } return; } @@ -11069,10 +11112,11 @@ ns_query_start(ns_client_t *client) { result = dns_tkey_processquery(client->message, client->sctx->tkeyctx, client->view->dynamickeys); - if (result == ISC_R_SUCCESS) + if (result == ISC_R_SUCCESS) { query_send(client); - else + } else { query_error(client, result, __LINE__); + } return; default: /* TSIG, etc. */ query_error(client, DNS_R_FORMERR, __LINE__); @@ -11118,8 +11162,9 @@ ns_query_start(ns_client_t *client) { { client->query.dboptions |= DNS_DBFIND_PENDINGOK; client->query.fetchoptions |= DNS_FETCHOPT_NOVALIDATE; - } else if (!client->view->enablevalidation) + } else if (!client->view->enablevalidation) { client->query.fetchoptions |= DNS_FETCHOPT_NOVALIDATE; + } if (client->view->qminimization) { client->query.fetchoptions |= DNS_FETCHOPT_QMINIMIZE | @@ -11172,7 +11217,5 @@ ns_query_start(ns_client_t *client) { if (WANTDNSSEC(client) || WANTAD(client)) message->flags |= DNS_MESSAGEFLAG_AD; - qclient = NULL; - ns_client_attach(client, &qclient); - (void)query_setup(qclient, qtype); + (void)query_setup(client, qtype); } diff --git a/lib/ns/tests/Makefile.in b/lib/ns/tests/Makefile.in index 8b952a6375..0ba61773ff 100644 --- a/lib/ns/tests/Makefile.in +++ b/lib/ns/tests/Makefile.in @@ -15,6 +15,8 @@ VERSION=@BIND9_VERSION@ @BIND9_MAKE_INCLUDES@ +WRAP_OPTIONS = -Wl,--wrap=isc_nmhandle_unref + CINCLUDES = -I. -Iinclude ${NS_INCLUDES} ${DNS_INCLUDES} ${ISC_INCLUDES} \ ${OPENSSL_CFLAGS} \ @CMOCKA_CFLAGS@ @@ -29,6 +31,12 @@ NSDEPLIBS = ../libns.@A@ LIBS = @LIBS@ @CMOCKA_LIBS@ +SO_CFLAGS = @CFLAGS@ @SO_CFLAGS@ +SO_LDFLAGS = @LDFLAGS@ @SO_LDFLAGS@ + +SO_OBJS = wrap.@O@ +SO_SRCS = wrap.c +SO_TARGETS = libwrap.la OBJS = nstest.@O@ SRCS = nstest.c \ @@ -41,29 +49,39 @@ SUBDIRS = TARGETS = listenlist_test@EXEEXT@ \ notify_test@EXEEXT@ \ plugin_test@EXEEXT@ \ - query_test@EXEEXT@ + query_test@EXEEXT@ \ + @SO_TARGETS@ + +LD_WRAP_TESTS=@LD_WRAP_TESTS@ @BIND9_MAKE_RULES@ -listenlist_test@EXEEXT@: listenlist_test.@O@ nstest.@O@ ${NSDEPLIBS} ${ISCDEPLIBS} ${DNSDEPLIBS} - ${LIBTOOL_MODE_LINK} ${PURIFY} ${CC} ${CFLAGS} \ - ${LDFLAGS} -o $@ listenlist_test.@O@ nstest.@O@ \ - ${NSLIBS} ${DNSLIBS} ${ISCLIBS} ${LIBS} +libwrap.la: wrap.@O@ + ${LIBTOOL_MODE_LINK} @SO_LD@ ${SO_LDFLAGS} -Wl,-z,interpose -o $@ wrap.@O@ ${LIBS} -notify_test@EXEEXT@: notify_test.@O@ nstest.@O@ ${NSDEPLIBS} ${ISCDEPLIBS} ${DNSDEPLIBS} +listenlist_test@EXEEXT@: listenlist_test.@O@ nstest.@O@ libwrap.la ${NSDEPLIBS} ${ISCDEPLIBS} ${DNSDEPLIBS} + if test "${LD_WRAP_TESTS}" = true -a -z "${LIBTOOL}"; then WRAP="${WRAP_OPTIONS}"; fi; \ ${LIBTOOL_MODE_LINK} ${PURIFY} ${CC} ${CFLAGS} \ - ${LDFLAGS} -o $@ notify_test.@O@ nstest.@O@ \ - ${NSLIBS} ${DNSLIBS} ${ISCLIBS} ${LIBS} + ${LDFLAGS} $${WRAP} -Wl,-rpath=${top_builddir}/lib/ns/tests -o $@ listenlist_test.@O@ nstest.@O@ \ + libwrap.la ${NSLIBS} ${DNSLIBS} ${ISCLIBS} ${LIBS} -plugin_test@EXEEXT@: plugin_test.@O@ nstest.@O@ ${NSDEPLIBS} ${ISCDEPLIBS} ${DNSDEPLIBS} +notify_test@EXEEXT@: notify_test.@O@ nstest.@O@ libwrap.la ${NSDEPLIBS} ${ISCDEPLIBS} ${DNSDEPLIBS} + if test "${LD_WRAP_TESTS}" = true -a -z "${LIBTOOL}"; then WRAP="${WRAP_OPTIONS}"; fi; \ ${LIBTOOL_MODE_LINK} ${PURIFY} ${CC} ${CFLAGS} \ - ${LDFLAGS} -o $@ plugin_test.@O@ nstest.@O@ \ - ${NSLIBS} ${DNSLIBS} ${ISCLIBS} ${LIBS} + ${LDFLAGS} $${WRAP} -Wl,-rpath=${top_builddir}/lib/ns/tests -o $@ notify_test.@O@ nstest.@O@ libwrap.la \ + libwrap.la ${NSLIBS} ${DNSLIBS} ${ISCLIBS} ${LIBS} -query_test@EXEEXT@: query_test.@O@ nstest.@O@ ${NSDEPLIBS} ${ISCDEPLIBS} ${DNSDEPLIBS} +plugin_test@EXEEXT@: plugin_test.@O@ nstest.@O@ libwrap.la ${NSDEPLIBS} ${ISCDEPLIBS} ${DNSDEPLIBS} + if test "${LD_WRAP_TESTS}" = true -a -z "${LIBTOOL}"; then WRAP="${WRAP_OPTIONS}"; fi; \ ${LIBTOOL_MODE_LINK} ${PURIFY} ${CC} ${CFLAGS} \ - ${LDFLAGS} -o $@ query_test.@O@ nstest.@O@ \ - ${NSLIBS} ${DNSLIBS} ${ISCLIBS} ${LIBS} + ${LDFLAGS} $${WRAP} -Wl,-rpath=${top_builddir}/lib/ns/tests -o $@ plugin_test.@O@ nstest.@O@ \ + libwrap.la ${NSLIBS} ${DNSLIBS} ${ISCLIBS} ${LIBS} + +query_test@EXEEXT@: query_test.@O@ nstest.@O@ libwrap.la ${NSDEPLIBS} ${ISCDEPLIBS} ${DNSDEPLIBS} + if test "${LD_WRAP_TESTS}" = true -a -z "${LIBTOOL}"; then WRAP="${WRAP_OPTIONS}"; fi; \ + ${LIBTOOL_MODE_LINK} ${PURIFY} ${CC} ${CFLAGS} \ + ${LDFLAGS} $${WRAP} -Wl,-rpath=${top_builddir}/lib/ns/tests -o $@ query_test.@O@ nstest.@O@ \ + libwrap.la ${NSLIBS} ${DNSLIBS} ${ISCLIBS} ${LIBS} unit:: sh ${top_builddir}/unit/unittest.sh diff --git a/lib/ns/tests/listenlist_test.c b/lib/ns/tests/listenlist_test.c index 32466d1e0c..ca8130b181 100644 --- a/lib/ns/tests/listenlist_test.c +++ b/lib/ns/tests/listenlist_test.c @@ -119,6 +119,15 @@ main(void) { _setup, _teardown), }; + /* + * We disable this test when the address sanitizer is in + * the use, as libuv will trigger errors. + */ + if (getenv("ASAN_OPTIONS") != NULL) { + printf("1..0 # Skip ASAN is in use\n"); + return (0); + } + return (cmocka_run_group_tests(tests, NULL, NULL)); } #else /* HAVE_CMOCKA */ @@ -127,7 +136,7 @@ main(void) { int main(void) { - printf("1..0 # Skipped: cmocka not available\n"); + printf("1..0 # Skip cmocka not available\n"); return (0); } diff --git a/lib/ns/tests/notify_test.c b/lib/ns/tests/notify_test.c index 8cae165209..7f3974dd67 100644 --- a/lib/ns/tests/notify_test.c +++ b/lib/ns/tests/notify_test.c @@ -136,8 +136,7 @@ notify_start(void **state) { * Clean up */ ns_test_cleanup_zone(); - - ns_client_detach(&client); + isc_nmhandle_unref(client->handle); } int @@ -147,6 +146,15 @@ main(void) { _setup, _teardown), }; + /* + * We disable this test when the address sanitizer is in + * the use, as libuv will trigger errors. + */ + if (getenv("ASAN_OPTIONS") != NULL) { + printf("1..0 # Skip ASAN is in use\n"); + return (0); + } + return (cmocka_run_group_tests(tests, NULL, NULL)); } #else /* HAVE_CMOCKA */ @@ -155,7 +163,7 @@ main(void) { int main(void) { - printf("1..0 # Skipped: cmocka not available\n"); + printf("1..0 # Skip cmocka not available\n"); return (0); } diff --git a/lib/ns/tests/nstest.c b/lib/ns/tests/nstest.c index e2ff80785e..1d0354ff19 100644 --- a/lib/ns/tests/nstest.c +++ b/lib/ns/tests/nstest.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,7 @@ isc_taskmgr_t *taskmgr = NULL; isc_task_t *maintask = NULL; isc_timermgr_t *timermgr = NULL; isc_socketmgr_t *socketmgr = NULL; +isc_nm_t *nm = NULL; dns_zonemgr_t *zonemgr = NULL; dns_dispatchmgr_t *dispatchmgr = NULL; ns_clientmgr_t *clientmgr = NULL; @@ -70,6 +72,37 @@ static bool test_running = false; static dns_zone_t *served_zone = NULL; +/* + * We don't want to use netmgr-based client accounting, we need to emulate it. + */ +atomic_uint_fast32_t client_refs[16]; +atomic_uintptr_t client_addrs[16]; + +void +__wrap_isc_nmhandle_unref(isc_nmhandle_t *handle); + +void +__wrap_isc_nmhandle_unref(isc_nmhandle_t *handle) { + ns_client_t *client = (ns_client_t *)handle; + int i; + + for (i = 0; i < 16; i++) { + if (atomic_load(&client_addrs[i]) == (uintptr_t) client) { + break; + } + } + REQUIRE(i < 16); + + if (atomic_fetch_sub(&client_refs[i], 1) == 1) { + dns_view_detach(&client->view); + client->state = 4; + ns__client_reset_cb(client); + ns__client_put_cb(client); + isc_mem_put(mctx, client, sizeof(ns_client_t)); + } + return; +} + /* * Logging categories: this needs to match the list in lib/ns/log.c. */ @@ -108,10 +141,6 @@ static void shutdown_managers(isc_task_t *task, isc_event_t *event) { UNUSED(task); - if (clientmgr != NULL) { - ns_clientmgr_destroy(&clientmgr); - } - if (interfacemgr != NULL) { ns_interfacemgr_shutdown(interfacemgr); ns_interfacemgr_detach(&interfacemgr); @@ -148,9 +177,22 @@ cleanup_managers(void) { if (sctx != NULL) { ns_server_detach(&sctx); } + if (interfacemgr != NULL) { + ns_interfacemgr_detach(&interfacemgr); + } if (socketmgr != NULL) { isc_socketmgr_destroy(&socketmgr); } + ns_test_nap(500000); + if (nm != NULL ){ + /* + * Force something in the workqueue as a workaround + * for libuv bug - not sending uv_close callback. + */ + isc_nm_pause(nm); + isc_nm_resume(nm); + isc_nm_detach(&nm); + } if (taskmgr != NULL) { isc_taskmgr_destroy(&taskmgr); } @@ -177,7 +219,7 @@ create_managers(void) { isc_event_t *event = NULL; ncpus = isc_os_ncpus(); - CHECK(isc_taskmgr_create(mctx, ncpus, 0, &taskmgr)); + CHECK(isc_taskmgr_create(mctx, ncpus, 0, NULL, &taskmgr)); CHECK(isc_task_create(taskmgr, 0, &maintask)); isc_taskmgr_setexcltask(taskmgr, maintask); CHECK(isc_task_onshutdown(maintask, shutdown_managers, NULL)); @@ -186,17 +228,16 @@ create_managers(void) { CHECK(isc_socketmgr_create(mctx, &socketmgr)); + nm = isc_nm_start(mctx, ncpus); + CHECK(ns_server_create(mctx, matchview, &sctx)); CHECK(dns_dispatchmgr_create(mctx, &dispatchmgr)); CHECK(ns_interfacemgr_create(mctx, sctx, taskmgr, timermgr, - socketmgr, dispatchmgr, maintask, + socketmgr, nm, dispatchmgr, maintask, ncpus, NULL, &interfacemgr)); - CHECK(ns_clientmgr_create(mctx, sctx, taskmgr, timermgr, - &clientmgr)); - CHECK(ns_listenlist_default(mctx, 5300, -1, true, &listenon)); ns_interfacemgr_setlistenon4(interfacemgr, listenon); ns_listenlist_detach(&listenon); @@ -212,6 +253,8 @@ create_managers(void) { * we'll just sleep for a bit and hope. */ ns_test_nap(500000); + ns_interface_t *ifp = ns__interfacemgr_getif(interfacemgr); + clientmgr = ifp->clientmgr; run_managers = true; @@ -510,16 +553,28 @@ ns_test_getclient(ns_interface_t *ifp0, bool tcp, ns_client_t **clientp) { isc_result_t result; - ns_interface_t *ifp = ifp0; + ns_client_t *client = isc_mem_get(mctx, sizeof(ns_client_t)); + int i; - if (ifp == NULL) { - ifp = ns__interfacemgr_getif(interfacemgr); - } - if (ifp == NULL) { - return (ISC_R_FAILURE); - } + UNUSED(ifp0); + UNUSED(tcp); + + result = ns__client_setup(client, clientmgr, true); + + for (i = 0; i < 16; i++) { + if (atomic_load(&client_addrs[i]) == (uintptr_t) NULL || + atomic_load(&client_addrs[i]) == (uintptr_t) client) + { + break; + } + } + REQUIRE(i < 16); + + atomic_store(&client_refs[i], 2); + atomic_store(&client_addrs[i], (uintptr_t) client); + client->handle = (isc_nmhandle_t *) client; /* Hack */ + *clientp = client; - result = ns__clientmgr_getclient(clientmgr, ifp, tcp, clientp); return (result); } @@ -765,14 +820,14 @@ ns_test_qctx_create(const ns_test_qctx_create_params_t *params, * Reference count for "client" is now at 2, so decrement it in order * for it to drop to zero when "qctx" gets destroyed. */ - ns_client_detach(&client); + isc_nmhandle_unref(client->handle); return (ISC_R_SUCCESS); destroy_query: dns_message_destroy(&client->message); detach_client: - ns_client_detach(&client); + isc_nmhandle_unref(client->handle); return (result); } @@ -786,14 +841,15 @@ ns_test_qctx_destroy(query_ctx_t **qctxp) { qctx = *qctxp; - ns_client_detach(&qctx->client); - if (qctx->zone != NULL) { dns_zone_detach(&qctx->zone); } if (qctx->db != NULL) { dns_db_detach(&qctx->db); } + if (qctx->client != NULL) { + isc_nmhandle_unref(qctx->client->handle); + } isc_mem_put(mctx, qctx, sizeof(*qctx)); *qctxp = NULL; diff --git a/lib/ns/tests/query_test.c b/lib/ns/tests/query_test.c index 415de2a887..3540a3f91e 100644 --- a/lib/ns/tests/query_test.c +++ b/lib/ns/tests/query_test.c @@ -598,6 +598,15 @@ main(void) { _setup, _teardown), }; + /* + * We disable this test when the address sanitizer is in + * the use, as libuv will trigger errors. + */ + if (getenv("ASAN_OPTIONS") != NULL) { + printf("1..0 # Skip ASAN is in use\n"); + return (0); + } + return (cmocka_run_group_tests(tests, NULL, NULL)); } #else /* HAVE_CMOCKA */ @@ -606,7 +615,7 @@ main(void) { int main(void) { - printf("1..0 # Skipped: cmocka not available\n"); + printf("1..0 # Skip cmocka not available\n"); return (0); } diff --git a/lib/ns/tests/wrap.c b/lib/ns/tests/wrap.c new file mode 100644 index 0000000000..41b8449fc6 --- /dev/null +++ b/lib/ns/tests/wrap.c @@ -0,0 +1,40 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +/*! \file */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +/* + * This overrides calls to isc_nmhandle_unref(), sending them to + * __wrap_isc_nmhandle_unref(), when libtool is in use and LD_WRAP + * can't be used. + */ + +extern void +__wrap_isc_nmhandle_unref(isc_nmhandle_t *handle); + +void +isc_nmhandle_unref(isc_nmhandle_t *handle) { + __wrap_isc_nmhandle_unref(handle); +} diff --git a/lib/ns/update.c b/lib/ns/update.c index 3baf428bd8..bdf55c10a9 100644 --- a/lib/ns/update.c +++ b/lib/ns/update.c @@ -1529,7 +1529,6 @@ send_update_event(ns_client_t *client, dns_zone_t *zone) { isc_result_t result = ISC_R_SUCCESS; update_event_t *event = NULL; isc_task_t *zonetask = NULL; - ns_client_t *evclient; event = (update_event_t *) isc_event_allocate(client->mctx, client, DNS_EVENT_UPDATE, @@ -1537,12 +1536,11 @@ send_update_event(ns_client_t *client, dns_zone_t *zone) { event->zone = zone; event->result = ISC_R_SUCCESS; - evclient = NULL; - ns_client_attach(client, &evclient); INSIST(client->nupdates == 0); client->nupdates++; - event->ev_arg = evclient; + event->ev_arg = client; + isc_nmhandle_ref(client->handle); dns_zone_gettask(zone, &zonetask); isc_task_send(zonetask, ISC_EVENT_PTR(&event)); @@ -1559,6 +1557,7 @@ respond(ns_client_t *client, isc_result_t result) { client->message->rcode = dns_result_torcode(result); ns_client_send(client); + isc_nmhandle_unref(client->handle); return; msg_failure: @@ -1566,7 +1565,8 @@ respond(ns_client_t *client, isc_result_t result) { ISC_LOG_ERROR, "could not create update response message: %s", isc_result_totext(msg_result)); - ns_client_next(client, msg_result); + ns_client_drop(client, msg_result); + isc_nmhandle_unref(client->handle); } void @@ -2518,7 +2518,8 @@ update_action(isc_task_t *task, isc_event_t *event) { dns_ttl_t maxttl = 0; uint32_t maxrecords; uint64_t records; - dns_aclenv_t *env = ns_interfacemgr_getaclenv(client->interface->mgr); + dns_aclenv_t *env = + ns_interfacemgr_getaclenv(client->manager->interface->mgr); INSIST(event->ev_type == DNS_EVENT_UPDATE); @@ -3389,7 +3390,7 @@ updatedone_action(isc_task_t *task, isc_event_t *event) { client->nupdates--; respond(client, uev->result); isc_event_free(&event); - ns_client_detach(&client); + isc_nmhandle_unref(client->handle); } /*% @@ -3406,10 +3407,9 @@ forward_fail(isc_task_t *task, isc_event_t *event) { client->nupdates--; respond(client, DNS_R_SERVFAIL); isc_event_free(&event); - ns_client_detach(&client); + isc_nmhandle_unref(client->handle); } - static void forward_callback(void *arg, isc_result_t result, dns_message_t *answer) { update_event_t *uev = arg; @@ -3443,7 +3443,7 @@ forward_done(isc_task_t *task, isc_event_t *event) { ns_client_sendraw(client, uev->answer); dns_message_destroy(&uev->answer); isc_event_free(&event); - ns_client_detach(&client); + isc_nmhandle_unref(client->handle); } static void @@ -3461,8 +3461,10 @@ forward_action(isc_task_t *task, isc_event_t *event) { isc_task_send(client->task, &event); inc_stats(client, zone, ns_statscounter_updatefwdfail); dns_zone_detach(&zone); - } else + } else { inc_stats(client, zone, ns_statscounter_updatereqfwd); + } + isc_task_detach(&task); } @@ -3473,13 +3475,6 @@ send_forward_event(ns_client_t *client, dns_zone_t *zone) { isc_result_t result = ISC_R_SUCCESS; update_event_t *event = NULL; isc_task_t *zonetask = NULL; - ns_client_t *evclient; - - /* - * This may take some time so replace this client. - */ - if (!client->mortal && (client->attributes & NS_CLIENTATTR_TCP) == 0) - CHECK(ns_client_replace(client)); event = (update_event_t *) isc_event_allocate(client->mctx, client, DNS_EVENT_UPDATE, @@ -3487,11 +3482,9 @@ send_forward_event(ns_client_t *client, dns_zone_t *zone) { event->zone = zone; event->result = ISC_R_SUCCESS; - evclient = NULL; - ns_client_attach(client, &evclient); INSIST(client->nupdates == 0); client->nupdates++; - event->ev_arg = evclient; + event->ev_arg = client; dns_name_format(dns_zone_getorigin(zone), namebuf, sizeof(namebuf)); @@ -3503,10 +3496,11 @@ send_forward_event(ns_client_t *client, dns_zone_t *zone) { namebuf, classbuf); dns_zone_gettask(zone, &zonetask); + isc_nmhandle_ref(client->handle); isc_task_send(zonetask, ISC_EVENT_PTR(&event)); - failure: - if (event != NULL) + if (event != NULL) { isc_event_free(ISC_EVENT_PTR(&event)); + } return (result); } diff --git a/lib/ns/win32/libns.def b/lib/ns/win32/libns.def index d221b0d5b2..2105a3e493 100644 --- a/lib/ns/win32/libns.def +++ b/lib/ns/win32/libns.def @@ -3,18 +3,19 @@ LIBRARY libns ; Exported Functions EXPORTS +ns__client_put_cb ns__client_request -ns__clientmgr_getclient +ns__client_reset_cb +ns__client_setup ns__interfacemgr_getif ns__interfacemgr_nextif ns__query_sfcache ns__query_start ns_client_aclmsg ns_client_addopt -ns_client_attach ns_client_checkacl ns_client_checkaclsilent -ns_client_detach +ns_client_drop ns_client_dumprecursing ns_client_error ns_client_findversion @@ -29,19 +30,16 @@ ns_client_newdbversion ns_client_newname ns_client_newnamebuf ns_client_newrdataset -ns_client_next ns_client_putrdataset ns_client_qnamereplace ns_client_recursing ns_client_releasename -ns_client_replace ns_client_send ns_client_sendraw ns_client_settimeout ns_client_shuttingdown ns_client_sourceip ns_clientmgr_create -ns_clientmgr_createclients ns_clientmgr_destroy ns_hook_add ns_hooktable_create diff --git a/lib/ns/win32/libns.vcxproj.filters b/lib/ns/win32/libns.vcxproj.filters index 499e7380f0..2931412cbf 100644 --- a/lib/ns/win32/libns.vcxproj.filters +++ b/lib/ns/win32/libns.vcxproj.filters @@ -18,12 +18,6 @@ - - Library Source Files - - - Library Source Files - Source Files @@ -63,6 +57,12 @@ Source Files + + Source Files + + + Source Files + @@ -111,4 +111,4 @@ Header Files - + \ No newline at end of file diff --git a/lib/ns/xfrout.c b/lib/ns/xfrout.c index 7672f59d9a..9c393c63ac 100644 --- a/lib/ns/xfrout.c +++ b/lib/ns/xfrout.c @@ -669,6 +669,7 @@ typedef struct { names and rdatas */ isc_buffer_t txlenbuf; /* Transmit length buffer */ isc_buffer_t txbuf; /* Transmit message buffer */ + size_t cbytes; /* Length of current message */ void *txmem; unsigned int txmemlen; dns_tsigkey_t *tsigkey; /* Key used to create TSIG */ @@ -682,24 +683,21 @@ typedef struct { struct xfr_stats stats; /*%< Transfer statistics */ } xfrout_ctx_t; -static isc_result_t +static void xfrout_ctx_create(isc_mem_t *mctx, ns_client_t *client, unsigned int id, dns_name_t *qname, dns_rdatatype_t qtype, dns_rdataclass_t qclass, dns_zone_t *zone, dns_db_t *db, dns_dbversion_t *ver, isc_quota_t *quota, rrstream_t *stream, dns_tsigkey_t *tsigkey, - isc_buffer_t *lasttsig, - bool verified_tsig, - unsigned int maxtime, - unsigned int idletime, - bool many_answers, - xfrout_ctx_t **xfrp); + isc_buffer_t *lasttsig, bool verified_tsig, + unsigned int maxtime, unsigned int idletime, + bool many_answers, xfrout_ctx_t **xfrp); static void sendstream(xfrout_ctx_t *xfr); static void -xfrout_senddone(isc_task_t *task, isc_event_t *event); +xfrout_senddone(isc_nmhandle_t *handle, isc_result_t result, void *arg); static void xfrout_fail(xfrout_ctx_t *xfr, isc_result_t result, const char *msg); @@ -1067,29 +1065,26 @@ ns_xfr_start(ns_client_t *client, dns_rdatatype_t reqtype) { if (is_dlz) { - CHECK(xfrout_ctx_create(mctx, client, request->id, - question_name, reqtype, question_class, - zone, db, ver, quota, stream, - dns_message_gettsigkey(request), - tsigbuf, - request->verified_sig, - 3600, - 3600, - (format == dns_many_answers) ? - true : false, - &xfr)); + xfrout_ctx_create(mctx, client, request->id, + question_name, reqtype, question_class, + zone, db, ver, quota, stream, + dns_message_gettsigkey(request), + tsigbuf, request->verified_sig, + 3600, 3600, + (format == dns_many_answers) + ? true : false, + &xfr); } else { - CHECK(xfrout_ctx_create(mctx, client, request->id, - question_name, reqtype, question_class, - zone, db, ver, quota, stream, - dns_message_gettsigkey(request), - tsigbuf, - request->verified_sig, - dns_zone_getmaxxfrout(zone), - dns_zone_getidleout(zone), - (format == dns_many_answers) ? - true : false, - &xfr)); + xfrout_ctx_create(mctx, client, request->id, + question_name, reqtype, question_class, + zone, db, ver, quota, stream, + dns_message_gettsigkey(request), + tsigbuf, request->verified_sig, + dns_zone_getmaxxfrout(zone), + dns_zone_getidleout(zone), + (format == dns_many_answers) + ? true : false, + &xfr); } xfr->mnemonic = mnemonic; @@ -1189,10 +1184,11 @@ ns_xfr_start(ns_client_t *client, dns_rdatatype_t reqtype) { NS_LOGMODULE_XFER_OUT, ISC_LOG_DEBUG(3), "zone transfer setup failed"); ns_client_error(client, result); + isc_nmhandle_unref(client->handle); } } -static isc_result_t +static void xfrout_ctx_create(isc_mem_t *mctx, ns_client_t *client, unsigned int id, dns_name_t *qname, dns_rdatatype_t qtype, dns_rdataclass_t qclass, dns_zone_t *zone, @@ -1203,16 +1199,18 @@ xfrout_ctx_create(isc_mem_t *mctx, ns_client_t *client, unsigned int id, bool many_answers, xfrout_ctx_t **xfrp) { xfrout_ctx_t *xfr; - isc_result_t result; unsigned int len; void *mem; - INSIST(xfrp != NULL && *xfrp == NULL); + REQUIRE(xfrp != NULL && *xfrp == NULL); + + UNUSED(maxtime); + UNUSED(idletime); + xfr = isc_mem_get(mctx, sizeof(*xfr)); xfr->mctx = NULL; isc_mem_attach(mctx, &xfr->mctx); - xfr->client = NULL; - ns_client_attach(client, &xfr->client); + xfr->client = client; xfr->id = id; xfr->qname = qname; xfr->qtype = qtype; @@ -1271,8 +1269,10 @@ xfrout_ctx_create(isc_mem_t *mctx, ns_client_t *client, unsigned int id, xfr->txmem = mem; xfr->txmemlen = len; +#if 0 CHECK(dns_timer_setidle(xfr->client->timer, maxtime, idletime, false)); +#endif /* * Register a shutdown callback with the client, so that we @@ -1289,14 +1289,8 @@ xfrout_ctx_create(isc_mem_t *mctx, ns_client_t *client, unsigned int id, xfr->stream = stream; *xfrp = xfr; - return (ISC_R_SUCCESS); - -failure: - xfrout_ctx_destroy(&xfr); - return (result); } - /* * Arrange to send as much as we can of "stream" without blocking. * @@ -1310,8 +1304,6 @@ sendstream(xfrout_ctx_t *xfr) { dns_message_t *tcpmsg = NULL; dns_message_t *msg = NULL; /* Client message if UDP, tcpmsg if TCP */ isc_result_t result; - isc_region_t used; - isc_region_t region; dns_rdataset_t *qrdataset; dns_name_t *msgname = NULL; dns_rdata_t *msgrdata = NULL; @@ -1320,7 +1312,6 @@ sendstream(xfrout_ctx_t *xfr) { dns_compress_t cctx; bool cleanup_cctx = false; bool is_tcp; - int n_rrs; isc_buffer_clear(&xfr->buf); @@ -1545,6 +1536,7 @@ sendstream(xfrout_ctx_t *xfr) { } if (is_tcp) { + isc_region_t used; CHECK(dns_compress_init(&cctx, -1, xfr->mctx)); dns_compress_setsensitive(&cctx, true); cleanup_cctx = true; @@ -1556,22 +1548,20 @@ sendstream(xfrout_ctx_t *xfr) { cleanup_cctx = false; isc_buffer_usedregion(&xfr->txbuf, &used); - isc_buffer_putuint16(&xfr->txlenbuf, - (uint16_t)used.length); - region.base = xfr->txlenbuf.base; - region.length = 2 + used.length; + xfrout_log(xfr, ISC_LOG_DEBUG(8), "sending TCP message of %d bytes", used.length); - CHECK(isc_socket_send(xfr->client->tcpsocket, /* XXX */ - ®ion, xfr->client->task, - xfrout_senddone, - xfr)); + + CHECK(isc_nm_send(xfr->client->handle, &used, + xfrout_senddone, xfr)); xfr->sends++; + xfr->cbytes = used.length; } else { xfrout_log(xfr, ISC_LOG_DEBUG(8), "sending IXFR UDP response"); ns_client_send(xfr->client); xfr->stream->methods->pause(xfr->stream); + isc_nmhandle_unref(xfr->client->handle); xfrout_ctx_destroy(&xfr); return; } @@ -1615,7 +1605,6 @@ sendstream(xfrout_ctx_t *xfr) { static void xfrout_ctx_destroy(xfrout_ctx_t **xfrp) { xfrout_ctx_t *xfr = *xfrp; - ns_client_t *client = NULL; INSIST(xfr->sends == 0); @@ -1639,28 +1628,18 @@ xfrout_ctx_destroy(xfrout_ctx_t **xfrp) { if (xfr->db != NULL) dns_db_detach(&xfr->db); - /* - * We want to detch the client after we have released the memory - * context as ns_client_detach checks the memory reference count. - */ - ns_client_attach(xfr->client, &client); - ns_client_detach(&xfr->client); isc_mem_putanddetach(&xfr->mctx, xfr, sizeof(*xfr)); - ns_client_detach(&client); *xfrp = NULL; } static void -xfrout_senddone(isc_task_t *task, isc_event_t *event) { - isc_socketevent_t *sev = (isc_socketevent_t *)event; - xfrout_ctx_t *xfr = (xfrout_ctx_t *)event->ev_arg; - isc_result_t evresult = sev->result; +xfrout_senddone(isc_nmhandle_t *handle, isc_result_t result, void *arg) { + xfrout_ctx_t *xfr = (xfrout_ctx_t *)arg; - UNUSED(task); + REQUIRE((xfr->client->attributes & NS_CLIENTATTR_TCP) != 0); - INSIST(event->ev_type == ISC_SOCKEVENT_SENDDONE); - INSIST((xfr->client->attributes & NS_CLIENTATTR_TCP) != 0); + INSIST(handle == xfr->client->handle); xfr->sends--; INSIST(xfr->sends == 0); @@ -1669,20 +1648,23 @@ xfrout_senddone(isc_task_t *task, isc_event_t *event) { * Update transfer statistics if sending succeeded, accounting for the * two-byte TCP length prefix included in the number of bytes sent. */ - if (evresult == ISC_R_SUCCESS) { + if (result == ISC_R_SUCCESS) { xfr->stats.nmsg++; - xfr->stats.nbytes += sev->region.length - 2; + xfr->stats.nbytes += xfr->cbytes; } - isc_event_free(&event); - +#if 0 (void)isc_timer_touch(xfr->client->timer); +#endif + if (xfr->shuttingdown == true) { xfrout_maybe_destroy(xfr); - } else if (evresult != ISC_R_SUCCESS) { - xfrout_fail(xfr, evresult, "send"); + } else if (result != ISC_R_SUCCESS) { + xfrout_fail(xfr, result, "send"); } else if (xfr->end_of_stream == false) { sendstream(xfr); + /* Return now so we don't unref the handle */ + return; } else { /* End of zone transfer stream. */ uint64_t msecs, persec; @@ -1707,9 +1689,10 @@ xfrout_senddone(isc_task_t *task, isc_event_t *event) { (unsigned int) (msecs % 1000), (unsigned int) persec); - ns_client_next(xfr->client, ISC_R_SUCCESS); xfrout_ctx_destroy(&xfr); } + + isc_nmhandle_unref(handle); } static void @@ -1723,6 +1706,7 @@ xfrout_fail(xfrout_ctx_t *xfr, isc_result_t result, const char *msg) { static void xfrout_maybe_destroy(xfrout_ctx_t *xfr) { INSIST(xfr->shuttingdown == true); +#if 0 if (xfr->sends > 0) { /* * If we are currently sending, cancel it and wait for @@ -1731,9 +1715,13 @@ xfrout_maybe_destroy(xfrout_ctx_t *xfr) { isc_socket_cancel(xfr->client->tcpsocket, xfr->client->task, ISC_SOCKCANCEL_SEND); } else { - ns_client_next(xfr->client, ISC_R_CANCELED); +#endif + ns_client_drop(xfr->client, ISC_R_CANCELED); + isc_nmhandle_unref(xfr->client->handle); xfrout_ctx_destroy(&xfr); +#if 0 } +#endif } static void diff --git a/util/copyrights b/util/copyrights index 9d5aa28c46..92a4506c57 100644 --- a/util/copyrights +++ b/util/copyrights @@ -624,9 +624,8 @@ ./bin/tests/system/emptyzones/setup.sh SH 2014,2016,2018,2019 ./bin/tests/system/emptyzones/tests.sh SH 2014,2015,2016,2018,2019 ./bin/tests/system/feature-test.c C 2016,2017,2018,2019 -./bin/tests/system/fetchlimit/ans4/ans.pl PERL 2015,2016,2018,2019 +./bin/tests/system/fetchlimit/ans4/ans.pl PERL 2019 ./bin/tests/system/fetchlimit/clean.sh SH 2015,2016,2018,2019 -./bin/tests/system/fetchlimit/ns3/named.args X 2015,2018,2019 ./bin/tests/system/fetchlimit/prereq.sh SH 2018,2019 ./bin/tests/system/fetchlimit/setup.sh SH 2015,2016,2018,2019 ./bin/tests/system/fetchlimit/tests.sh SH 2015,2016,2018,2019 @@ -909,13 +908,12 @@ ./bin/tests/system/redirect/ns5/sign.sh SH 2019 ./bin/tests/system/redirect/setup.sh SH 2011,2012,2013,2014,2015,2016,2017,2018,2019 ./bin/tests/system/redirect/tests.sh SH 2011,2012,2013,2014,2015,2016,2018,2019 -./bin/tests/system/resolver/ans2/ans.pl PERL 2000,2001,2004,2007,2009,2010,2012,2016,2018,2019 +./bin/tests/system/resolver/ans2/ans.pl PERL 2019 ./bin/tests/system/resolver/ans3/ans.pl PERL 2000,2001,2004,2007,2009,2012,2016,2018,2019 ./bin/tests/system/resolver/ans8/ans.pl PERL 2017,2018,2019 ./bin/tests/system/resolver/clean.sh SH 2008,2009,2010,2011,2012,2013,2014,2015,2016,2018,2019 ./bin/tests/system/resolver/ns4/named.noaa TXT.BRIEF 2010,2016,2018,2019 ./bin/tests/system/resolver/ns6/keygen.sh SH 2010,2012,2014,2016,2017,2018,2019 -./bin/tests/system/resolver/ns7/named.args X 2011,2012,2014,2018,2019 ./bin/tests/system/resolver/prereq.sh SH 2000,2001,2004,2007,2012,2014,2016,2018,2019 ./bin/tests/system/resolver/setup.sh SH 2010,2011,2012,2013,2014,2016,2017,2018,2019 ./bin/tests/system/resolver/tests.sh SH 2000,2001,2004,2007,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 @@ -2129,6 +2127,7 @@ ./lib/isc/api X 1999,2000,2001,2006,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 ./lib/isc/app.c C 1999,2000,2001,2002,2003,2004,2005,2007,2008,2009,2013,2014,2015,2016,2017,2018,2019 ./lib/isc/assertions.c C 1997,1998,1999,2000,2001,2004,2005,2007,2008,2009,2015,2016,2018,2019 +./lib/isc/astack.c C 2019 ./lib/isc/backtrace-emptytbl.c C 2009,2016,2018,2019 ./lib/isc/backtrace.c C 2009,2013,2014,2015,2016,2018,2019 ./lib/isc/base32.c C 2008,2009,2013,2014,2015,2016,2018,2019 @@ -2148,6 +2147,7 @@ ./lib/isc/heap.c C 1997,1998,1999,2000,2001,2004,2005,2006,2007,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 ./lib/isc/hex.c C 2000,2001,2002,2003,2004,2005,2007,2008,2013,2014,2015,2016,2018,2019 ./lib/isc/hmac.c C 2000,2001,2004,2005,2006,2007,2009,2011,2012,2013,2014,2015,2016,2017,2018,2019 +./lib/isc/hp.c C 2019 ./lib/isc/ht.c C 2016,2017,2018,2019 ./lib/isc/httpd.c C 2006,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 ./lib/isc/include/Makefile.in MAKE 1998,1999,2000,2001,2004,2007,2012,2014,2016,2018,2019 @@ -2155,6 +2155,7 @@ ./lib/isc/include/isc/aes.h C 2014,2016,2018,2019 ./lib/isc/include/isc/app.h C 1999,2000,2001,2004,2005,2006,2007,2009,2013,2014,2015,2016,2018,2019 ./lib/isc/include/isc/assertions.h C 1997,1998,1999,2000,2001,2004,2005,2006,2007,2008,2009,2016,2017,2018,2019 +./lib/isc/include/isc/astack.h C 2019 ./lib/isc/include/isc/atomic.h C 2018,2019 ./lib/isc/include/isc/backtrace.h C 2009,2016,2018,2019 ./lib/isc/include/isc/base32.h C 2008,2014,2016,2018,2019 @@ -2179,6 +2180,7 @@ ./lib/isc/include/isc/heap.h C 1997,1998,1999,2000,2001,2004,2005,2006,2007,2009,2012,2016,2018,2019 ./lib/isc/include/isc/hex.h C 2000,2001,2004,2005,2006,2007,2008,2016,2018,2019 ./lib/isc/include/isc/hmac.h C 2000,2001,2004,2005,2006,2007,2009,2011,2012,2013,2014,2015,2016,2017,2018,2019 +./lib/isc/include/isc/hp.h C 2019 ./lib/isc/include/isc/ht.h C 2016,2017,2018,2019 ./lib/isc/include/isc/httpd.h C 2006,2007,2008,2014,2016,2018,2019 ./lib/isc/include/isc/interfaceiter.h C 1999,2000,2001,2004,2005,2006,2007,2016,2018,2019 @@ -2197,6 +2199,7 @@ ./lib/isc/include/isc/mutexatomic.h C 2019 ./lib/isc/include/isc/mutexblock.h C 1999,2000,2001,2004,2005,2006,2007,2016,2018,2019 ./lib/isc/include/isc/netaddr.h C 1998,1999,2000,2001,2002,2004,2005,2006,2007,2009,2015,2016,2017,2018,2019 +./lib/isc/include/isc/netmgr.h C 2019 ./lib/isc/include/isc/netscope.h C 2002,2004,2005,2006,2007,2009,2016,2018,2019 ./lib/isc/include/isc/nonce.h C 2018,2019 ./lib/isc/include/isc/os.h C 2000,2001,2004,2005,2006,2007,2016,2018,2019 @@ -2251,6 +2254,12 @@ ./lib/isc/mem_p.h C 2018,2019 ./lib/isc/mutexblock.c C 1999,2000,2001,2004,2005,2007,2011,2012,2016,2018,2019 ./lib/isc/netaddr.c C 1999,2000,2001,2002,2004,2005,2007,2010,2011,2012,2014,2015,2016,2017,2018,2019 +./lib/isc/netmgr/netmgr-int.h C 2019 +./lib/isc/netmgr/netmgr.c C 2019 +./lib/isc/netmgr/tcp.c C 2019 +./lib/isc/netmgr/tcpdns.c C 2019 +./lib/isc/netmgr/udp.c C 2019 +./lib/isc/netmgr/uverr2result.c C 2019 ./lib/isc/netscope.c C 2002,2004,2005,2006,2007,2016,2018,2019 ./lib/isc/nonce.c C 2018,2019 ./lib/isc/openssl_shim.c C 2018,2019 @@ -2268,6 +2277,7 @@ ./lib/isc/pthreads/include/isc/thread.h C 1998,1999,2000,2001,2004,2005,2007,2013,2016,2017,2018,2019 ./lib/isc/pthreads/mutex.c C 2000,2001,2002,2004,2005,2007,2008,2011,2012,2014,2015,2016,2018,2019 ./lib/isc/pthreads/thread.c C 2000,2001,2003,2004,2005,2007,2013,2016,2017,2018,2019 +./lib/isc/queue.c C 2019 ./lib/isc/quota.c C 2000,2001,2004,2005,2007,2016,2018,2019 ./lib/isc/radix.c C 2007,2008,2009,2011,2012,2013,2014,2015,2016,2018,2019 ./lib/isc/random.c C 1999,2000,2001,2002,2003,2004,2005,2007,2009,2013,2014,2016,2017,2018,2019 @@ -2304,7 +2314,6 @@ ./lib/isc/tests/netaddr_test.c C 2016,2018,2019 ./lib/isc/tests/parse_test.c C 2012,2013,2016,2018,2019 ./lib/isc/tests/pool_test.c C 2013,2016,2018,2019 -./lib/isc/tests/queue_test.c C 2011,2012,2016,2018,2019 ./lib/isc/tests/radix_test.c C 2014,2016,2018,2019 ./lib/isc/tests/random_test.c C 2014,2015,2016,2017,2018,2019 ./lib/isc/tests/regex_test.c C 2013,2015,2016,2018,2019 @@ -2329,6 +2338,7 @@ ./lib/isc/unix/file.c C 2000,2001,2002,2004,2005,2007,2009,2011,2012,2013,2014,2015,2016,2017,2018,2019 ./lib/isc/unix/fsaccess.c C 2000,2001,2004,2005,2006,2007,2016,2018,2019 ./lib/isc/unix/ifiter_getifaddrs.c C 2003,2004,2005,2007,2008,2009,2014,2016,2018,2019 +./lib/isc/unix/include/isc/align.h C 2019 ./lib/isc/unix/include/isc/dir.h C 1999,2000,2001,2004,2005,2007,2016,2018,2019 ./lib/isc/unix/include/isc/net.h C 1999,2000,2001,2002,2003,2004,2005,2007,2008,2012,2013,2014,2016,2017,2018,2019 ./lib/isc/unix/include/isc/netdb.h C 1999,2000,2001,2004,2005,2007,2016,2018,2019 @@ -2361,6 +2371,7 @@ ./lib/isc/win32/fsaccess.c C 2000,2001,2002,2004,2007,2013,2016,2017,2018,2019 ./lib/isc/win32/include/Makefile.in MAKE 1999,2000,2001,2004,2007,2012,2014,2016,2018,2019 ./lib/isc/win32/include/isc/Makefile.in MAKE 1999,2000,2001,2004,2007,2012,2013,2014,2015,2016,2018,2019 +./lib/isc/win32/include/isc/align.h C 2019 ./lib/isc/win32/include/isc/bind_registry.h C 2001,2004,2007,2016,2018,2019 ./lib/isc/win32/include/isc/bindevt.h C 2001,2004,2007,2016,2018,2019 ./lib/isc/win32/include/isc/condition.h C 1998,1999,2000,2001,2004,2007,2016,2018,2019 @@ -2501,6 +2512,7 @@ ./lib/ns/tests/plugin_test.c C 2019 ./lib/ns/tests/query_test.c C 2017,2018,2019 ./lib/ns/tests/testdata/notify/notify1.msg X 2017,2018,2019 +./lib/ns/tests/wrap.c C 2019 ./lib/ns/update.c C 2017,2018,2019 ./lib/ns/version.c C 2017,2018,2019 ./lib/ns/win32/DLLMain.c C 2017,2018,2019 diff --git a/win32utils/Configure b/win32utils/Configure index cd97fd760e..df52ef0fc9 100644 --- a/win32utils/Configure +++ b/win32utils/Configure @@ -253,6 +253,7 @@ my @substinc = ("GSSAPI_INC", "GEOIP_INC", "IDN_INC", "LIBXML2_INC", + "LIBUV_INC", "OPENSSL_INC", "READLINE_INC", "ZLIB_INC"); @@ -266,6 +267,7 @@ my @substlib = ("GSSAPI_LIB", "IDN_LIB", "KRB5_LIB", "LIBXML2_LIB", + "LIBUV_LIB", "OPENSSL_LIB", "READLINE_LIB", "READLINE_LIBD", @@ -282,6 +284,7 @@ my @substdll = ("COMERR_DLL", "KRB5_DLL", "K5SPRT_DLL", "LIBXML2_DLL", + "LIBUV_DLL", "OPENSSL_DLL", "WSHELP_DLL", "ZLIB_DLL"); @@ -379,6 +382,7 @@ my @withlist = ("aes", "system-tests", "tests", "tuning", + "libuv", "vcredist", "zlib"); @@ -418,6 +422,7 @@ my @help = ( " with-system-tests build with system test suite\n", " with-samples build with sample programs\n", " with-openssl[=PATH] build with OpenSSL yes|path (mandatory)\n", +" with-libuv[=PATH] build with libuv yes|path (mandatory)\n", " with-pkcs11[=PATH] build with PKCS#11 support yes|no|provider-path\n", " with-eddsa crypto EDDSA yes|all|no\n", " with-gssapi[=PATH] build with MIT KfW GSSAPI yes|no|path\n", @@ -461,6 +466,8 @@ my $use_tests = "no"; my $use_xtests = "no"; my $use_stests = "no"; my $use_samples = "no"; +my $use_libuv = "auto"; +my $libuv_path = "..\\..\\"; my $use_openssl = "auto"; my $openssl_path = "..\\..\\"; my $use_pkcs11 = "no"; @@ -736,6 +743,13 @@ sub mywith { $use_openssl = "yes"; $openssl_path = $val; } + } elsif ($key =~ /^libuv$/i) { + if ($val =~ /^no$/i) { + die "libuv is required\n"; + } elsif ($val !~ /^yes$/i) { + $use_libuv = "yes"; + $libuv_path = $val; + } } elsif ($key =~ /^pkcs11$/i) { if ($val =~ /^yes$/i) { $use_pkcs11 = "yes"; @@ -937,6 +951,7 @@ if ($verbose) { } else { print "querytrace: disabled\n"; } + print "libuv-path: $libuv_path\n"; print "openssl-path: $openssl_path\n"; if ($use_tests eq "yes") { print "tests: enabled\n"; @@ -1280,6 +1295,65 @@ if ($use_samples eq "yes") { $configcond{"SAMPLES"} = 1; } +# with-libuv +if ($use_libuv eq "auto") { + if ($verbose) { + print "checking for an libuv built directory at sibling root\n"; + } + opendir DIR, $libuv_path || die "No Directory: $!\n"; + my @dirlist = grep (/^libuv-v[0-9]+\.[0-9]+\.[0-9]+(-rc[0-9]+){0,1}$/i, + readdir(DIR)); + closedir(DIR); + + # Make sure we have something + if (scalar(@dirlist) == 0) { + die "can't find an libuv at sibling root\n"; + } + # Now see if we have a directory or just a file. + # Make sure we are case insensitive + my $file; + foreach $file (sort {uc($b) cmp uc($a)} @dirlist) { + if (-f File::Spec->catfile($libuv_path, + $file, + "include\\uv.h")) { + $libuv_path = File::Spec->catdir($libuv_path, $file); + $use_libuv = "yes"; + last; + } + } + + # If we have one use it otherwise report the error + if ($use_libuv eq "auto") { + die "can't find an libuv built directory at sibling root\n"; + } +} +# falls into (so no else) +if ($use_libuv eq "yes") { + $libuv_path = File::Spec->rel2abs($libuv_path); + if ($verbose) { + print "checking for libuv built directory at \"$libuv_path\"\n"; + } + my $libuv_new = 0; + if (!-f File::Spec->catfile($libuv_path, + "include\\uv.h")) { + die "can't find libuv uv.h include\n"; + } + my $libuv_inc = File::Spec->catdir($libuv_path, "include"); + my $libuv_libdir = File::Spec->catdir($libuv_path, "Release"); + my $libuv_lib = File::Spec->catfile($libuv_libdir, "libuv.lib"); + my $libuv_dll = File::Spec->catfile($libuv_libdir, "libuv.dll"); + if (!-f $libuv_lib) { + die "can't find libuv.lib library\n"; + } + if (!-f $libuv_dll) { + die "can't find libuv.dll library\n"; + } + $configvar{"LIBUV_PATH"} = "$libuv_path"; + $configinc{"LIBUV_INC"} = "$libuv_inc"; + $configlib{"LIBUV_LIB"} = "$libuv_lib"; + $configdll{"LIBUV_DLL"} = "$libuv_dll"; +} + # with-openssl if ($use_openssl eq "auto") { if ($verbose) {