From 45090894194361574eafd366df44dffa291a3493 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Wed, 2 Dec 2020 20:51:38 +0100 Subject: [PATCH 1/2] Add configuration option to set send/recv buffers on the nm sockets This commit adds a new configuration option to set the receive and send buffer sizes on the TCP and UDP netmgr sockets. The default is `0` which doesn't set any value and just uses the value set by the operating system. There's no magic value here - set it too small and the performance will drop, set it too large, the buffers can fill-up with queries that have already timeouted on the client side and nobody is interested for the answer and this would just make the server clog up even more by making it produce useless work. The `netstat -su` can be used on POSIX systems to monitor the receive and send buffer errors. --- bin/named/config.c | 4 ++++ bin/named/named.conf.rst | 4 ++++ bin/named/server.c | 45 ++++++++++++++++++++++++++++++++++++ doc/arm/reference.rst | 18 +++++++++++++++ doc/man/named.conf.5in | 4 ++++ doc/misc/options | 4 ++++ doc/misc/options.active | 4 ++++ doc/misc/options.grammar.rst | 4 ++++ lib/isc/include/isc/netmgr.h | 11 +++++++++ lib/isc/netmgr/netmgr-int.h | 14 +++++++++++ lib/isc/netmgr/netmgr.c | 45 ++++++++++++++++++++++++++++++++++++ lib/isc/netmgr/tcp.c | 4 ++++ lib/isc/netmgr/tcpdns.c | 4 ++++ lib/isc/netmgr/tlsdns.c | 4 ++++ lib/isc/netmgr/udp.c | 19 +++------------ lib/isc/win32/libisc.def.in | 1 + lib/isccfg/namedconf.c | 4 ++++ 17 files changed, 177 insertions(+), 16 deletions(-) diff --git a/bin/named/config.c b/bin/named/config.c index 3e23ed0a7b..6f56fbadbc 100644 --- a/bin/named/config.c +++ b/bin/named/config.c @@ -123,6 +123,8 @@ options {\n\ tcp-initial-timeout 300;\n\ tcp-keepalive-timeout 300;\n\ tcp-listen-queue 10;\n\ + tcp-receive-buffer 0;\n\ + tcp-send-buffer 0;\n\ # tkey-dhkey \n\ # tkey-domain \n\ # tkey-gssapi-credential \n\ @@ -133,6 +135,8 @@ options {\n\ # treat-cr-as-space ;\n\ trust-anchor-telemetry yes;\n\ # use-id-pool ;\n\ + udp-receive-buffer 0;\n\ + udp-send-buffer 0;\n\ \n\ /* view */\n\ allow-new-zones no;\n\ diff --git a/bin/named/named.conf.rst b/bin/named/named.conf.rst index bb4003ac1c..39407490f6 100644 --- a/bin/named/named.conf.rst +++ b/bin/named/named.conf.rst @@ -432,6 +432,8 @@ OPTIONS tcp-initial-timeout integer; tcp-keepalive-timeout integer; tcp-listen-queue integer; + tcp-receive-buffer integer; + tcp-send-buffer integer; tkey-dhkey quoted_string integer; tkey-domain quoted_string; tkey-gssapi-credential quoted_string; @@ -448,6 +450,8 @@ OPTIONS transfers-per-ns integer; trust-anchor-telemetry boolean; // experimental try-tcp-refresh boolean; + udp-receive-buffer integer; + udp-send-buffer integer; update-check-ksk boolean; use-alt-transfer-source boolean; use-v4-udp-ports { portrange; ... }; diff --git a/bin/named/server.c b/bin/named/server.c index f8d65e3b32..1bd3bb505a 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -8503,6 +8503,10 @@ load_configuration(const char *filename, named_server_t *server, uint32_t reserved; uint32_t udpsize; uint32_t transfer_message_size; + uint32_t recv_tcp_buffer_size; + uint32_t send_tcp_buffer_size; + uint32_t recv_udp_buffer_size; + uint32_t send_udp_buffer_size; named_cache_t *nsc; named_cachelist_t cachelist, tmpcachelist; ns_altsecret_t *altsecret; @@ -8774,6 +8778,9 @@ load_configuration(const char *filename, named_server_t *server, named_g_aclconfctx), "configuring statistics server(s)"); + /* + * Configure the network manager + */ obj = NULL; result = named_config_get(maps, "tcp-initial-timeout", &obj); INSIST(result == ISC_R_SUCCESS); @@ -8843,6 +8850,44 @@ load_configuration(const char *filename, named_server_t *server, isc_nm_settimeouts(named_g_netmgr, initial, idle, keepalive, advertised); +#define CAP_IF_NOT_ZERO(v, min, max) \ + if (v > 0 && v < min) { \ + recv_tcp_buffer_size = min; \ + } else if (v > max) { \ + recv_tcp_buffer_size = max; \ + } + + /* Set the kernel send and receive buffer sizes */ + obj = NULL; + result = named_config_get(maps, "tcp-receive-buffer", &obj); + INSIST(result == ISC_R_SUCCESS); + recv_tcp_buffer_size = cfg_obj_asuint32(obj); + CAP_IF_NOT_ZERO(recv_tcp_buffer_size, 4096, INT32_MAX); + + obj = NULL; + result = named_config_get(maps, "tcp-send-buffer", &obj); + INSIST(result == ISC_R_SUCCESS); + send_tcp_buffer_size = cfg_obj_asuint32(obj); + CAP_IF_NOT_ZERO(send_tcp_buffer_size, 4096, INT32_MAX); + + obj = NULL; + result = named_config_get(maps, "udp-receive-buffer", &obj); + INSIST(result == ISC_R_SUCCESS); + recv_udp_buffer_size = cfg_obj_asuint32(obj); + CAP_IF_NOT_ZERO(recv_udp_buffer_size, 4096, INT32_MAX); + + obj = NULL; + result = named_config_get(maps, "udp-send-buffer", &obj); + INSIST(result == ISC_R_SUCCESS); + send_udp_buffer_size = cfg_obj_asuint32(obj); + CAP_IF_NOT_ZERO(send_udp_buffer_size, 4096, INT32_MAX); + + isc_nm_setnetbuffers(named_g_netmgr, recv_tcp_buffer_size, + send_tcp_buffer_size, recv_udp_buffer_size, + send_udp_buffer_size); + +#undef CAP_IF_NOT_ZERO + /* * Configure sets of UDP query source ports. */ diff --git a/doc/arm/reference.rst b/doc/arm/reference.rst index 68748252e2..073ec17cd7 100644 --- a/doc/arm/reference.rst +++ b/doc/arm/reference.rst @@ -3585,6 +3585,24 @@ Tuning milliseconds to prefer IPv6 name servers. The default is ``50`` milliseconds. +``tcp-recv-buffer``; ``udp-recv-buffer`` + These options control the operating system receiving network buffer sizes for + TCP and UDP respectively. Buffering on the operating system level can + prevent packet drops during short spikes, but if the value is set too large + it could clog up a running server with outstanding queries that have already + timeouted. The default is ``0`` which means to use the operating system + default value. The operating system caps the maximum value that the user can + set here. + +``tcp-send-buffer``; ``udp-send-buffer`` + These options control the operating system sending network buffer sizes for + TCP and UDP respectively. Buffering on the operating system level can + prevent packet drops during short spikes, but if the value is set too large + it could clog up a running server with outstanding queries that have already + timeouted. The default is ``0`` which means to use the operating system + default value. The operating system caps the maximum value that the user can + set here. + .. _builtin: Built-in Server Information Zones diff --git a/doc/man/named.conf.5in b/doc/man/named.conf.5in index 35872a5ad1..5a311c30db 100644 --- a/doc/man/named.conf.5in +++ b/doc/man/named.conf.5in @@ -499,6 +499,8 @@ options { tcp\-initial\-timeout integer; tcp\-keepalive\-timeout integer; tcp\-listen\-queue integer; + tcp\-receive\-buffer integer; + tcp\-send\-buffer integer; tkey\-dhkey quoted_string integer; tkey\-domain quoted_string; tkey\-gssapi\-credential quoted_string; @@ -515,6 +517,8 @@ options { transfers\-per\-ns integer; trust\-anchor\-telemetry boolean; // experimental try\-tcp\-refresh boolean; + udp\-receive\-buffer integer; + udp\-send\-buffer integer; update\-check\-ksk boolean; use\-alt\-transfer\-source boolean; use\-v4\-udp\-ports { portrange; ... }; diff --git a/doc/misc/options b/doc/misc/options index 6a035d686e..e42b004c9e 100644 --- a/doc/misc/options +++ b/doc/misc/options @@ -358,6 +358,8 @@ options { tcp-initial-timeout ; tcp-keepalive-timeout ; tcp-listen-queue ; + tcp-receive-buffer ; + tcp-send-buffer ; tkey-dhkey ; tkey-domain ; tkey-gssapi-credential ; @@ -374,6 +376,8 @@ options { transfers-per-ns ; trust-anchor-telemetry ; // experimental try-tcp-refresh ; + udp-receive-buffer ; + udp-send-buffer ; update-check-ksk ; use-alt-transfer-source ; use-v4-udp-ports { ; ... }; diff --git a/doc/misc/options.active b/doc/misc/options.active index c8c56ea427..d5adf85a98 100644 --- a/doc/misc/options.active +++ b/doc/misc/options.active @@ -355,6 +355,8 @@ options { tcp-initial-timeout ; tcp-keepalive-timeout ; tcp-listen-queue ; + tcp-receive-buffer ; + tcp-send-buffer ; tkey-dhkey ; tkey-domain ; tkey-gssapi-credential ; @@ -371,6 +373,8 @@ options { transfers-per-ns ; trust-anchor-telemetry ; // experimental try-tcp-refresh ; + udp-receive-buffer ; + udp-send-buffer ; update-check-ksk ; use-alt-transfer-source ; use-v4-udp-ports { ; ... }; diff --git a/doc/misc/options.grammar.rst b/doc/misc/options.grammar.rst index 5a4c4290f6..9ee853c6a9 100644 --- a/doc/misc/options.grammar.rst +++ b/doc/misc/options.grammar.rst @@ -277,6 +277,8 @@ tcp-initial-timeout ; tcp-keepalive-timeout ; tcp-listen-queue ; + tcp-receive-buffer ; + tcp-send-buffer ; tkey-dhkey ; tkey-domain ; tkey-gssapi-credential ; @@ -293,6 +295,8 @@ transfers-per-ns ; trust-anchor-telemetry ; // experimental try-tcp-refresh ; + udp-receive-buffer ; + udp-send-buffer ; update-check-ksk ; use-alt-transfer-source ; use-v4-udp-ports { ; ... }; diff --git a/lib/isc/include/isc/netmgr.h b/lib/isc/include/isc/netmgr.h index 7ce897786b..89ef001a71 100644 --- a/lib/isc/include/isc/netmgr.h +++ b/lib/isc/include/isc/netmgr.h @@ -424,6 +424,17 @@ isc_nm_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle, * \li 'mgr' is a valid netmgr. */ +void +isc_nm_setnetbuffers(isc_nm_t *mgr, int32_t recv_tcp, int32_t send_tcp, + int32_t recv_udp, int32_t send_udp); +/*%< + * If not 0, sets the SO_RCVBUF and SO_SNDBUF socket options for TCP and UDP + * respectively. + * + * Requires: + * \li 'mgr' is a valid netmgr. + */ + void isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, uint32_t *keepalive, uint32_t *advertised); diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index f45116ae41..f131400152 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -709,6 +709,14 @@ struct isc_nm { isc_barrier_t pausing; isc_barrier_t resuming; + /* + * Socket SO_RCVBUF and SO_SNDBUF values + */ + atomic_int_fast32_t recv_udp_buffer_size; + atomic_int_fast32_t send_udp_buffer_size; + atomic_int_fast32_t recv_tcp_buffer_size; + atomic_int_fast32_t send_tcp_buffer_size; + #ifdef NETMGR_TRACE ISC_LIST(isc_nmsocket_t) active_sockets; #endif @@ -1769,6 +1777,12 @@ isc__nm_socket_tcp_nodelay(uv_os_sock_t fd); * Disables Nagle's algorithm on a TCP socket (sets TCP_NODELAY). */ +void +isc__nm_set_network_buffers(isc_nm_t *nm, uv_handle_t *handle); +/*%> + * Sets the pre-configured network buffers size on the handle. + */ + /* * typedef all the netievent types */ diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index d61c8c87f7..ac2f38992a 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -615,6 +615,17 @@ isc_nm_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle, atomic_store(&mgr->advertised, advertised); } +void +isc_nm_setnetbuffers(isc_nm_t *mgr, int32_t recv_tcp, int32_t send_tcp, + int32_t recv_udp, int32_t send_udp) { + REQUIRE(VALID_NM(mgr)); + + atomic_store(&mgr->recv_tcp_buffer_size, recv_tcp); + atomic_store(&mgr->send_tcp_buffer_size, send_tcp); + atomic_store(&mgr->recv_udp_buffer_size, recv_udp); + atomic_store(&mgr->send_udp_buffer_size, send_udp); +} + void isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, uint32_t *keepalive, uint32_t *advertised) { @@ -3141,6 +3152,40 @@ isc__nm_socket_tcp_nodelay(uv_os_sock_t fd) { #endif } +void +isc__nm_set_network_buffers(isc_nm_t *nm, uv_handle_t *handle) { + int32_t recv_buffer_size = 0; + int32_t send_buffer_size = 0; + + switch (handle->type) { + case UV_TCP: + recv_buffer_size = + atomic_load_relaxed(&nm->recv_tcp_buffer_size); + send_buffer_size = + atomic_load_relaxed(&nm->send_tcp_buffer_size); + break; + case UV_UDP: + recv_buffer_size = + atomic_load_relaxed(&nm->recv_udp_buffer_size); + send_buffer_size = + atomic_load_relaxed(&nm->send_udp_buffer_size); + break; + default: + INSIST(0); + ISC_UNREACHABLE(); + } + + if (recv_buffer_size > 0) { + int r = uv_recv_buffer_size(handle, &recv_buffer_size); + INSIST(r == 0); + } + + if (send_buffer_size > 0) { + int r = uv_send_buffer_size(handle, &send_buffer_size); + INSIST(r == 0); + } +} + #ifdef NETMGR_TRACE /* * Dump all active sockets in netmgr. We output to stderr diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index c7030ecccc..7ea14d29f3 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -158,6 +158,8 @@ tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { } } + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + uv_handle_set_data(&req->uv_req.handle, req); r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp, &req->peer.type.sa, tcp_connect_cb); @@ -571,6 +573,8 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { } #endif + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + /* * The callback will run in the same thread uv_listen() was called * from, so a race with tcp_connection_cb() isn't possible. diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c index 0d9bd85f99..fe0cfccf04 100644 --- a/lib/isc/netmgr/tcpdns.c +++ b/lib/isc/netmgr/tcpdns.c @@ -132,6 +132,8 @@ tcpdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { } } + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + uv_handle_set_data(&req->uv_req.handle, req); r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp, &req->peer.type.sa, tcpdns_connect_cb); @@ -540,6 +542,8 @@ isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { } #endif + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + /* * The callback will run in the same thread uv_listen() was called * from, so a race with tcpdns_connection_cb() isn't possible. diff --git a/lib/isc/netmgr/tlsdns.c b/lib/isc/netmgr/tlsdns.c index 212d81539c..afe6a31043 100644 --- a/lib/isc/netmgr/tlsdns.c +++ b/lib/isc/netmgr/tlsdns.c @@ -149,6 +149,8 @@ tlsdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { } } + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + uv_handle_set_data(&req->uv_req.handle, req); r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp, &req->peer.type.sa, tlsdns_connect_cb); @@ -610,6 +612,8 @@ isc__nm_async_tlsdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { } #endif + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + /* * The callback will run in the same thread uv_listen() was * called from, so a race with tlsdns_connection_cb() isn't diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index b8a40d99f5..2c944b7d82 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -274,14 +274,8 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { } #endif -#ifdef ISC_RECV_BUFFER_SIZE - uv_recv_buffer_size(&sock->uv_handle.handle, - &(int){ ISC_RECV_BUFFER_SIZE }); -#endif -#ifdef ISC_SEND_BUFFER_SIZE - uv_send_buffer_size(&sock->uv_handle.handle, - &(int){ ISC_SEND_BUFFER_SIZE }); -#endif + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); + r = uv_udp_recv_start(&sock->uv_handle.udp, isc__nm_alloc_cb, udp_recv_cb); if (r != 0) { @@ -647,14 +641,7 @@ udp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { goto done; } -#ifdef ISC_RECV_BUFFER_SIZE - uv_recv_buffer_size(&sock->uv_handle.handle, - &(int){ ISC_RECV_BUFFER_SIZE }); -#endif -#ifdef ISC_SEND_BUFFER_SIZE - uv_send_buffer_size(&sock->uv_handle.handle, - &(int){ ISC_SEND_BUFFER_SIZE }); -#endif + isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); /* * On FreeBSD the UDP connect() call sometimes results in a diff --git a/lib/isc/win32/libisc.def.in b/lib/isc/win32/libisc.def.in index db582009ad..d384592646 100644 --- a/lib/isc/win32/libisc.def.in +++ b/lib/isc/win32/libisc.def.in @@ -471,6 +471,7 @@ isc_nm_stoplistening isc_nm_tcpconnect isc_nm_tcpdnsconnect isc_nm_gettimeouts +isc_nm_setnetbuffers isc_nm_settimeouts isc_nm_tcpdns_keepalive isc_nm_tcpdns_sequential diff --git a/lib/isccfg/namedconf.c b/lib/isccfg/namedconf.c index 59c2e57e9f..cf2b67deb7 100644 --- a/lib/isccfg/namedconf.c +++ b/lib/isccfg/namedconf.c @@ -1268,6 +1268,8 @@ static cfg_clausedef_t options_clauses[] = { { "tcp-initial-timeout", &cfg_type_uint32, 0 }, { "tcp-keepalive-timeout", &cfg_type_uint32, 0 }, { "tcp-listen-queue", &cfg_type_uint32, 0 }, + { "tcp-receive-buffer", &cfg_type_uint32, 0 }, + { "tcp-send-buffer", &cfg_type_uint32, 0 }, { "tkey-dhkey", &cfg_type_tkey_dhkey, 0 }, { "tkey-domain", &cfg_type_qstring, 0 }, { "tkey-gssapi-credential", &cfg_type_qstring, 0 }, @@ -1277,6 +1279,8 @@ static cfg_clausedef_t options_clauses[] = { { "transfers-out", &cfg_type_uint32, 0 }, { "transfers-per-ns", &cfg_type_uint32, 0 }, { "treat-cr-as-space", NULL, CFG_CLAUSEFLAG_ANCIENT }, + { "udp-receive-buffer", &cfg_type_uint32, 0 }, + { "udp-send-buffer", &cfg_type_uint32, 0 }, { "use-id-pool", NULL, CFG_CLAUSEFLAG_ANCIENT }, { "use-ixfr", NULL, CFG_CLAUSEFLAG_ANCIENT }, { "use-v4-udp-ports", &cfg_type_bracketed_portlist, 0 }, From 3733b4f10129e94898589d9421c1e55ab92034a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Mon, 15 Mar 2021 15:55:44 +0100 Subject: [PATCH 2/2] Add CHANGES and release note for GL #2313 --- CHANGES | 3 +++ doc/notes/notes-current.rst | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 0db8e249c9..fad866c7b3 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ +5640. [func] Add new configuration option to set the operating system + receive and send buffers. [GL #2313] + 5639. [bug] Check that the first and last SOA record of an AXFR are consistent. [GL #2528] diff --git a/doc/notes/notes-current.rst b/doc/notes/notes-current.rst index 000395b127..c4da0ed151 100644 --- a/doc/notes/notes-current.rst +++ b/doc/notes/notes-current.rst @@ -24,7 +24,14 @@ Known Issues New Features ~~~~~~~~~~~~ -- None. +- New configuration options, ``tcp-receive-buffer``, ``tcp-send-buffer``, + ``udp-receive-buffer``, and ``udp-send-buffer``, have been added. These + options allows the operator to fine tune the receiving and sending + buffers in the operating system. On busy servers, increasing the value + of the receive buffers can prevent the server from dropping the packets + during short spikes, and decreasing the value would prevent the server to + became clogged up with queries that are too old and have already timeouted + on the receiving side. :gl:`#2313` Removed Features ~~~~~~~~~~~~~~~~