diff --git a/CHANGES b/CHANGES index 0db8e249c9..fad866c7b3 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ +5640. [func] Add new configuration option to set the operating system + receive and send buffers. [GL #2313] + 5639. [bug] Check that the first and last SOA record of an AXFR are consistent. [GL #2528] diff --git a/bin/named/config.c b/bin/named/config.c index 3e23ed0a7b..6f56fbadbc 100644 --- a/bin/named/config.c +++ b/bin/named/config.c @@ -123,6 +123,8 @@ options {\n\ tcp-initial-timeout 300;\n\ tcp-keepalive-timeout 300;\n\ tcp-listen-queue 10;\n\ + tcp-receive-buffer 0;\n\ + tcp-send-buffer 0;\n\ # tkey-dhkey \n\ # tkey-domain \n\ # tkey-gssapi-credential \n\ @@ -133,6 +135,8 @@ options {\n\ # treat-cr-as-space ;\n\ trust-anchor-telemetry yes;\n\ # use-id-pool ;\n\ + udp-receive-buffer 0;\n\ + udp-send-buffer 0;\n\ \n\ /* view */\n\ allow-new-zones no;\n\ diff --git a/bin/named/named.conf.rst b/bin/named/named.conf.rst index bb4003ac1c..39407490f6 100644 --- a/bin/named/named.conf.rst +++ b/bin/named/named.conf.rst @@ -432,6 +432,8 @@ OPTIONS tcp-initial-timeout integer; tcp-keepalive-timeout integer; tcp-listen-queue integer; + tcp-receive-buffer integer; + tcp-send-buffer integer; tkey-dhkey quoted_string integer; tkey-domain quoted_string; tkey-gssapi-credential quoted_string; @@ -448,6 +450,8 @@ OPTIONS transfers-per-ns integer; trust-anchor-telemetry boolean; // experimental try-tcp-refresh boolean; + udp-receive-buffer integer; + udp-send-buffer integer; update-check-ksk boolean; use-alt-transfer-source boolean; use-v4-udp-ports { portrange; ... }; diff --git a/bin/named/server.c b/bin/named/server.c index f8d65e3b32..1bd3bb505a 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -8503,6 +8503,10 @@ load_configuration(const char *filename, named_server_t *server, uint32_t reserved; uint32_t udpsize; uint32_t transfer_message_size; + uint32_t recv_tcp_buffer_size; + uint32_t send_tcp_buffer_size; + uint32_t recv_udp_buffer_size; + uint32_t send_udp_buffer_size; named_cache_t *nsc; named_cachelist_t cachelist, tmpcachelist; ns_altsecret_t *altsecret; @@ -8774,6 +8778,9 @@ load_configuration(const char *filename, named_server_t *server, named_g_aclconfctx), "configuring statistics server(s)"); + /* + * Configure the network manager + */ obj = NULL; result = named_config_get(maps, "tcp-initial-timeout", &obj); INSIST(result == ISC_R_SUCCESS); @@ -8843,6 +8850,44 @@ load_configuration(const char *filename, named_server_t *server, isc_nm_settimeouts(named_g_netmgr, initial, idle, keepalive, advertised); +#define CAP_IF_NOT_ZERO(v, min, max) \ + if (v > 0 && v < min) { \ + recv_tcp_buffer_size = min; \ + } else if (v > max) { \ + recv_tcp_buffer_size = max; \ + } + + /* Set the kernel send and receive buffer sizes */ + obj = NULL; + result = named_config_get(maps, "tcp-receive-buffer", &obj); + INSIST(result == ISC_R_SUCCESS); + recv_tcp_buffer_size = cfg_obj_asuint32(obj); + CAP_IF_NOT_ZERO(recv_tcp_buffer_size, 4096, INT32_MAX); + + obj = NULL; + result = named_config_get(maps, "tcp-send-buffer", &obj); + INSIST(result == ISC_R_SUCCESS); + send_tcp_buffer_size = cfg_obj_asuint32(obj); + CAP_IF_NOT_ZERO(send_tcp_buffer_size, 4096, INT32_MAX); + + obj = NULL; + result = named_config_get(maps, "udp-receive-buffer", &obj); + INSIST(result == ISC_R_SUCCESS); + recv_udp_buffer_size = cfg_obj_asuint32(obj); + CAP_IF_NOT_ZERO(recv_udp_buffer_size, 4096, INT32_MAX); + + obj = NULL; + result = named_config_get(maps, "udp-send-buffer", &obj); + INSIST(result == ISC_R_SUCCESS); + send_udp_buffer_size = cfg_obj_asuint32(obj); + CAP_IF_NOT_ZERO(send_udp_buffer_size, 4096, INT32_MAX); + + isc_nm_setnetbuffers(named_g_netmgr, recv_tcp_buffer_size, + send_tcp_buffer_size, recv_udp_buffer_size, + send_udp_buffer_size); + +#undef CAP_IF_NOT_ZERO + /* * Configure sets of UDP query source ports. */ diff --git a/doc/arm/reference.rst b/doc/arm/reference.rst index 68748252e2..073ec17cd7 100644 --- a/doc/arm/reference.rst +++ b/doc/arm/reference.rst @@ -3585,6 +3585,24 @@ Tuning milliseconds to prefer IPv6 name servers. The default is ``50`` milliseconds. +``tcp-recv-buffer``; ``udp-recv-buffer`` + These options control the operating system receiving network buffer sizes for + TCP and UDP respectively. Buffering on the operating system level can + prevent packet drops during short spikes, but if the value is set too large + it could clog up a running server with outstanding queries that have already + timeouted. The default is ``0`` which means to use the operating system + default value. The operating system caps the maximum value that the user can + set here. + +``tcp-send-buffer``; ``udp-send-buffer`` + These options control the operating system sending network buffer sizes for + TCP and UDP respectively. Buffering on the operating system level can + prevent packet drops during short spikes, but if the value is set too large + it could clog up a running server with outstanding queries that have already + timeouted. The default is ``0`` which means to use the operating system + default value. The operating system caps the maximum value that the user can + set here. + .. _builtin: Built-in Server Information Zones diff --git a/doc/man/named.conf.5in b/doc/man/named.conf.5in index 35872a5ad1..5a311c30db 100644 --- a/doc/man/named.conf.5in +++ b/doc/man/named.conf.5in @@ -499,6 +499,8 @@ options { tcp\-initial\-timeout integer; tcp\-keepalive\-timeout integer; tcp\-listen\-queue integer; + tcp\-receive\-buffer integer; + tcp\-send\-buffer integer; tkey\-dhkey quoted_string integer; tkey\-domain quoted_string; tkey\-gssapi\-credential quoted_string; @@ -515,6 +517,8 @@ options { transfers\-per\-ns integer; trust\-anchor\-telemetry boolean; // experimental try\-tcp\-refresh boolean; + udp\-receive\-buffer integer; + udp\-send\-buffer integer; update\-check\-ksk boolean; use\-alt\-transfer\-source boolean; use\-v4\-udp\-ports { portrange; ... }; diff --git a/doc/misc/options b/doc/misc/options index 6a035d686e..e42b004c9e 100644 --- a/doc/misc/options +++ b/doc/misc/options @@ -358,6 +358,8 @@ options { tcp-initial-timeout ; tcp-keepalive-timeout ; tcp-listen-queue ; + tcp-receive-buffer ; + tcp-send-buffer ; tkey-dhkey ; tkey-domain ; tkey-gssapi-credential ; @@ -374,6 +376,8 @@ options { transfers-per-ns ; trust-anchor-telemetry ; // experimental try-tcp-refresh ; + udp-receive-buffer ; + udp-send-buffer ; update-check-ksk ; use-alt-transfer-source ; use-v4-udp-ports { ; ... }; diff --git a/doc/misc/options.active b/doc/misc/options.active index c8c56ea427..d5adf85a98 100644 --- a/doc/misc/options.active +++ b/doc/misc/options.active @@ -355,6 +355,8 @@ options { tcp-initial-timeout ; tcp-keepalive-timeout ; tcp-listen-queue ; + tcp-receive-buffer ; + tcp-send-buffer ; tkey-dhkey ; tkey-domain ; tkey-gssapi-credential ; @@ -371,6 +373,8 @@ options { transfers-per-ns ; trust-anchor-telemetry ; // experimental try-tcp-refresh ; + udp-receive-buffer ; + udp-send-buffer ; update-check-ksk ; use-alt-transfer-source ; use-v4-udp-ports { ; ... }; diff --git a/doc/misc/options.grammar.rst b/doc/misc/options.grammar.rst index 5a4c4290f6..9ee853c6a9 100644 --- a/doc/misc/options.grammar.rst +++ b/doc/misc/options.grammar.rst @@ -277,6 +277,8 @@ tcp-initial-timeout ; tcp-keepalive-timeout ; tcp-listen-queue ; + tcp-receive-buffer ; + tcp-send-buffer ; tkey-dhkey ; tkey-domain ; tkey-gssapi-credential ; @@ -293,6 +295,8 @@ transfers-per-ns ; trust-anchor-telemetry ; // experimental try-tcp-refresh ; + udp-receive-buffer ; + udp-send-buffer ; update-check-ksk ; use-alt-transfer-source ; use-v4-udp-ports { ; ... }; diff --git a/doc/notes/notes-current.rst b/doc/notes/notes-current.rst index 000395b127..c4da0ed151 100644 --- a/doc/notes/notes-current.rst +++ b/doc/notes/notes-current.rst @@ -24,7 +24,14 @@ Known Issues New Features ~~~~~~~~~~~~ -- None. +- New configuration options, ``tcp-receive-buffer``, ``tcp-send-buffer``, + ``udp-receive-buffer``, and ``udp-send-buffer``, have been added. These + options allows the operator to fine tune the receiving and sending + buffers in the operating system. On busy servers, increasing the value + of the receive buffers can prevent the server from dropping the packets + during short spikes, and decreasing the value would prevent the server to + became clogged up with queries that are too old and have already timeouted + on the receiving side. :gl:`#2313` Removed Features ~~~~~~~~~~~~~~~~ diff --git a/lib/isc/include/isc/netmgr.h b/lib/isc/include/isc/netmgr.h index 7ce897786b..89ef001a71 100644 --- a/lib/isc/include/isc/netmgr.h +++ b/lib/isc/include/isc/netmgr.h @@ -424,6 +424,17 @@ isc_nm_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle, * \li 'mgr' is a valid netmgr. */ +void +isc_nm_setnetbuffers(isc_nm_t *mgr, int32_t recv_tcp, int32_t send_tcp, + int32_t recv_udp, int32_t send_udp); +/*%< + * If not 0, sets the SO_RCVBUF and SO_SNDBUF socket options for TCP and UDP + * respectively. + * + * Requires: + * \li 'mgr' is a valid netmgr. + */ + void isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, uint32_t *keepalive, uint32_t *advertised); diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index f45116ae41..f131400152 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -709,6 +709,14 @@ struct isc_nm { isc_barrier_t pausing; isc_barrier_t resuming; + /* + * Socket SO_RCVBUF and SO_SNDBUF values + */ + atomic_int_fast32_t recv_udp_buffer_size; + atomic_int_fast32_t send_udp_buffer_size; + atomic_int_fast32_t recv_tcp_buffer_size; + atomic_int_fast32_t send_tcp_buffer_size; + #ifdef NETMGR_TRACE ISC_LIST(isc_nmsocket_t) active_sockets; #endif @@ -1769,6 +1777,12 @@ isc__nm_socket_tcp_nodelay(uv_os_sock_t fd); * Disables Nagle's algorithm on a TCP socket (sets TCP_NODELAY). */ +void +isc__nm_set_network_buffers(isc_nm_t *nm, uv_handle_t *handle); +/*%> + * Sets the pre-configured network buffers size on the handle. + */ + /* * typedef all the netievent types */ diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index d61c8c87f7..ac2f38992a 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -615,6 +615,17 @@ isc_nm_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle, atomic_store(&mgr->advertised, advertised); } +void +isc_nm_setnetbuffers(isc_nm_t *mgr, int32_t recv_tcp, int32_t send_tcp, + int32_t recv_udp, int32_t send_udp) { + REQUIRE(VALID_NM(mgr)); + + atomic_store(&mgr->recv_tcp_buffer_size, recv_tcp); + atomic_store(&mgr->send_tcp_buffer_size, send_tcp); + atomic_store(&mgr->recv_udp_buffer_size, recv_udp); + atomic_store(&mgr->send_udp_buffer_size, send_udp); +} + void isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, uint32_t *keepalive, uint32_t *advertised) { @@ -3141,6 +3152,40 @@ isc__nm_socket_tcp_nodelay(uv_os_sock_t fd) { #endif } +void +isc__nm_set_network_buffers(isc_nm_t *nm, uv_handle_t *handle) { + int32_t recv_buffer_size = 0; + int32_t send_buffer_size = 0; + + switch (handle->type) { + case UV_TCP: + recv_buffer_size = + atomic_load_relaxed(&nm->recv_tcp_buffer_size); + send_buffer_size = + atomic_load_relaxed(&nm->send_tcp_buffer_size); + break; + case UV_UDP: + recv_buffer_size = + atomic_load_relaxed(&nm->recv_udp_buffer_size); + send_buffer_size = + atomic_load_relaxed(&nm->send_udp_buffer_size); + break; + default: + INSIST(0); + ISC_UNREACHABLE(); + } + + if (recv_buffer_size > 0) { + int r = uv_recv_buffer_size(handle, &recv_buffer_size); + INSIST(r == 0); + } + + if (send_buffer_size > 0) { + int r = uv_send_buffer_size(handle, &send_buffer_size); + INSIST(r == 0); + } +} + #ifdef NETMGR_TRACE /* * Dump all active sockets in netmgr. We output to stderr diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index c7030ecccc..7ea14d29f3 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -158,6 +158,8 @@ tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { } } + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + uv_handle_set_data(&req->uv_req.handle, req); r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp, &req->peer.type.sa, tcp_connect_cb); @@ -571,6 +573,8 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { } #endif + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + /* * The callback will run in the same thread uv_listen() was called * from, so a race with tcp_connection_cb() isn't possible. diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c index 0d9bd85f99..fe0cfccf04 100644 --- a/lib/isc/netmgr/tcpdns.c +++ b/lib/isc/netmgr/tcpdns.c @@ -132,6 +132,8 @@ tcpdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { } } + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + uv_handle_set_data(&req->uv_req.handle, req); r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp, &req->peer.type.sa, tcpdns_connect_cb); @@ -540,6 +542,8 @@ isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { } #endif + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + /* * The callback will run in the same thread uv_listen() was called * from, so a race with tcpdns_connection_cb() isn't possible. diff --git a/lib/isc/netmgr/tlsdns.c b/lib/isc/netmgr/tlsdns.c index 212d81539c..afe6a31043 100644 --- a/lib/isc/netmgr/tlsdns.c +++ b/lib/isc/netmgr/tlsdns.c @@ -149,6 +149,8 @@ tlsdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { } } + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + uv_handle_set_data(&req->uv_req.handle, req); r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp, &req->peer.type.sa, tlsdns_connect_cb); @@ -610,6 +612,8 @@ isc__nm_async_tlsdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { } #endif + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + /* * The callback will run in the same thread uv_listen() was * called from, so a race with tlsdns_connection_cb() isn't diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index b8a40d99f5..2c944b7d82 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -274,14 +274,8 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { } #endif -#ifdef ISC_RECV_BUFFER_SIZE - uv_recv_buffer_size(&sock->uv_handle.handle, - &(int){ ISC_RECV_BUFFER_SIZE }); -#endif -#ifdef ISC_SEND_BUFFER_SIZE - uv_send_buffer_size(&sock->uv_handle.handle, - &(int){ ISC_SEND_BUFFER_SIZE }); -#endif + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + r = uv_udp_recv_start(&sock->uv_handle.udp, isc__nm_alloc_cb, udp_recv_cb); if (r != 0) { @@ -647,14 +641,7 @@ udp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { goto done; } -#ifdef ISC_RECV_BUFFER_SIZE - uv_recv_buffer_size(&sock->uv_handle.handle, - &(int){ ISC_RECV_BUFFER_SIZE }); -#endif -#ifdef ISC_SEND_BUFFER_SIZE - uv_send_buffer_size(&sock->uv_handle.handle, - &(int){ ISC_SEND_BUFFER_SIZE }); -#endif + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); /* * On FreeBSD the UDP connect() call sometimes results in a diff --git a/lib/isc/win32/libisc.def.in b/lib/isc/win32/libisc.def.in index db582009ad..d384592646 100644 --- a/lib/isc/win32/libisc.def.in +++ b/lib/isc/win32/libisc.def.in @@ -471,6 +471,7 @@ isc_nm_stoplistening isc_nm_tcpconnect isc_nm_tcpdnsconnect isc_nm_gettimeouts +isc_nm_setnetbuffers isc_nm_settimeouts isc_nm_tcpdns_keepalive isc_nm_tcpdns_sequential diff --git a/lib/isccfg/namedconf.c b/lib/isccfg/namedconf.c index 59c2e57e9f..cf2b67deb7 100644 --- a/lib/isccfg/namedconf.c +++ b/lib/isccfg/namedconf.c @@ -1268,6 +1268,8 @@ static cfg_clausedef_t options_clauses[] = { { "tcp-initial-timeout", &cfg_type_uint32, 0 }, { "tcp-keepalive-timeout", &cfg_type_uint32, 0 }, { "tcp-listen-queue", &cfg_type_uint32, 0 }, + { "tcp-receive-buffer", &cfg_type_uint32, 0 }, + { "tcp-send-buffer", &cfg_type_uint32, 0 }, { "tkey-dhkey", &cfg_type_tkey_dhkey, 0 }, { "tkey-domain", &cfg_type_qstring, 0 }, { "tkey-gssapi-credential", &cfg_type_qstring, 0 }, @@ -1277,6 +1279,8 @@ static cfg_clausedef_t options_clauses[] = { { "transfers-out", &cfg_type_uint32, 0 }, { "transfers-per-ns", &cfg_type_uint32, 0 }, { "treat-cr-as-space", NULL, CFG_CLAUSEFLAG_ANCIENT }, + { "udp-receive-buffer", &cfg_type_uint32, 0 }, + { "udp-send-buffer", &cfg_type_uint32, 0 }, { "use-id-pool", NULL, CFG_CLAUSEFLAG_ANCIENT }, { "use-ixfr", NULL, CFG_CLAUSEFLAG_ANCIENT }, { "use-v4-udp-ports", &cfg_type_bracketed_portlist, 0 },