mirror of
https://gitlab.isc.org/isc-projects/bind9
synced 2025-08-31 14:35:26 +00:00
Allocate DNS send buffers using dedicated per-worker memory arenas
This commit ensures that memory allocations related to DNS send buffers are routed through dedicated per-worker memory arenas in order to decrease memory usage on high load caused by TCP-based DNS transports. We do that by following jemalloc developers suggestions: https://github.com/jemalloc/jemalloc/issues/2483#issuecomment-1639019699 https://github.com/jemalloc/jemalloc/issues/2483#issuecomment-1698173849
This commit is contained in:
committed by
Ondřej Surý
parent
6e98b58d15
commit
01cc7edcca
@@ -334,7 +334,7 @@ client_allocsendbuf(ns_client_t *client, isc_buffer_t *buffer,
|
||||
|
||||
if (TCP_CLIENT(client)) {
|
||||
INSIST(client->tcpbuf == NULL);
|
||||
client->tcpbuf = isc_mem_get(client->manager->mctx,
|
||||
client->tcpbuf = isc_mem_get(client->manager->send_mctx,
|
||||
NS_CLIENT_TCP_BUFFER_SIZE);
|
||||
client->tcpbuf_size = NS_CLIENT_TCP_BUFFER_SIZE;
|
||||
data = client->tcpbuf;
|
||||
@@ -371,8 +371,8 @@ client_sendpkg(ns_client_t *client, isc_buffer_t *buffer) {
|
||||
|
||||
if (isc_buffer_base(buffer) == client->tcpbuf) {
|
||||
size_t used = isc_buffer_usedlength(buffer);
|
||||
client->tcpbuf =
|
||||
isc_mem_creget(client->manager->mctx, client->tcpbuf,
|
||||
client->tcpbuf = isc_mem_creget(
|
||||
client->manager->send_mctx, client->tcpbuf,
|
||||
client->tcpbuf_size, used, sizeof(char));
|
||||
client->tcpbuf_size = used;
|
||||
r.base = client->tcpbuf;
|
||||
@@ -449,7 +449,7 @@ ns_client_sendraw(ns_client_t *client, dns_message_t *message) {
|
||||
return;
|
||||
done:
|
||||
if (client->tcpbuf != NULL) {
|
||||
isc_mem_put(client->manager->mctx, client->tcpbuf,
|
||||
isc_mem_put(client->manager->send_mctx, client->tcpbuf,
|
||||
client->tcpbuf_size);
|
||||
}
|
||||
|
||||
@@ -733,7 +733,7 @@ renderend:
|
||||
|
||||
cleanup:
|
||||
if (client->tcpbuf != NULL) {
|
||||
isc_mem_put(client->manager->mctx, client->tcpbuf,
|
||||
isc_mem_put(client->manager->send_mctx, client->tcpbuf,
|
||||
client->tcpbuf_size);
|
||||
}
|
||||
|
||||
@@ -1621,7 +1621,7 @@ ns__client_reset_cb(void *client0) {
|
||||
|
||||
ns_client_endrequest(client);
|
||||
if (client->tcpbuf != NULL) {
|
||||
isc_mem_put(client->manager->mctx, client->tcpbuf,
|
||||
isc_mem_put(client->manager->send_mctx, client->tcpbuf,
|
||||
client->tcpbuf_size);
|
||||
}
|
||||
|
||||
@@ -1658,7 +1658,8 @@ ns__client_put_cb(void *client0) {
|
||||
|
||||
client->magic = 0;
|
||||
|
||||
isc_mem_put(manager->mctx, client->sendbuf, NS_CLIENT_SEND_BUFFER_SIZE);
|
||||
isc_mem_put(manager->send_mctx, client->sendbuf,
|
||||
NS_CLIENT_SEND_BUFFER_SIZE);
|
||||
if (client->opt != NULL) {
|
||||
INSIST(dns_rdataset_isassociated(client->opt));
|
||||
dns_rdataset_disassociate(client->opt);
|
||||
@@ -2308,7 +2309,7 @@ ns__client_setup(ns_client_t *client, ns_clientmgr_t *mgr, bool new) {
|
||||
dns_message_create(client->manager->mctx,
|
||||
DNS_MESSAGE_INTENTPARSE, &client->message);
|
||||
|
||||
client->sendbuf = isc_mem_get(client->manager->mctx,
|
||||
client->sendbuf = isc_mem_get(client->manager->send_mctx,
|
||||
NS_CLIENT_SEND_BUFFER_SIZE);
|
||||
/*
|
||||
* Set magic earlier than usual because ns_query_init()
|
||||
@@ -2356,7 +2357,7 @@ ns__client_setup(ns_client_t *client, ns_clientmgr_t *mgr, bool new) {
|
||||
|
||||
cleanup:
|
||||
if (client->sendbuf != NULL) {
|
||||
isc_mem_put(client->manager->mctx, client->sendbuf,
|
||||
isc_mem_put(client->manager->send_mctx, client->sendbuf,
|
||||
NS_CLIENT_SEND_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
@@ -2392,6 +2393,8 @@ clientmgr_destroy_cb(void *arg) {
|
||||
|
||||
ns_server_detach(&manager->sctx);
|
||||
|
||||
isc_mem_detach(&manager->send_mctx);
|
||||
|
||||
isc_mem_putanddetach(&manager->mctx, manager, sizeof(*manager));
|
||||
}
|
||||
|
||||
@@ -2424,6 +2427,61 @@ ns_clientmgr_create(ns_server_t *sctx, isc_loopmgr_t *loopmgr,
|
||||
isc_refcount_init(&manager->references, 1);
|
||||
ns_server_attach(sctx, &manager->sctx);
|
||||
|
||||
/*
|
||||
* We create specialised per-worker memory context specifically
|
||||
* dedicated and tuned for allocating send buffers as it is a very
|
||||
* common operation. Not doing so may result in excessive memory
|
||||
* use in certain workloads.
|
||||
*
|
||||
* Please see this thread for more details:
|
||||
*
|
||||
* https://github.com/jemalloc/jemalloc/issues/2483
|
||||
*
|
||||
* In particular, this information from the jemalloc developers is
|
||||
* of the most interest:
|
||||
*
|
||||
* https://github.com/jemalloc/jemalloc/issues/2483#issuecomment-1639019699
|
||||
* https://github.com/jemalloc/jemalloc/issues/2483#issuecomment-1698173849
|
||||
*
|
||||
* In essence, we use the following memory management strategy:
|
||||
*
|
||||
* 1. We use a per-worker memory arena for send buffers memory
|
||||
* allocation to reduce lock contention (In reality, we create a
|
||||
* per-client manager arena, but we have one client manager per
|
||||
* worker).
|
||||
*
|
||||
* 2. The automatically created arenas settings remain unchanged
|
||||
* and may be controlled by users (e.g. by setting the
|
||||
* "MALLOC_CONF" variable).
|
||||
*
|
||||
* 3. We attune the arenas to not use dirty pages cache as the
|
||||
* cache would have a poor reuse rate, and that is known to
|
||||
* significantly contribute to excessive memory use.
|
||||
*
|
||||
* 4. There is no strict need for the dirty cache, as there is a
|
||||
* per arena bin for each allocation size, so because we initially
|
||||
* allocate strictly 64K per send buffer (enough for a DNS
|
||||
* message), allocations would get directed to one bin (an "object
|
||||
* pool" or a "slab") maintained within an arena. That is, there
|
||||
* is an object pool already, specifically to optimise for the
|
||||
* case of frequent allocations of objects of the given size. The
|
||||
* object pool should suffice our needs, as we will end up
|
||||
* recycling the objects from there without the need to back it by
|
||||
* an additional layer of dirty pages cache. The dirty pages cache
|
||||
* would have worked better in the case when there are more
|
||||
* allocation bins involved due to a higher reuse rate (the case
|
||||
* of a more "generic" memory management).
|
||||
*/
|
||||
isc_mem_create_arena(&manager->send_mctx);
|
||||
isc_mem_setname(manager->send_mctx, "sendbufs");
|
||||
(void)isc_mem_arena_set_dirty_decay_ms(manager->send_mctx, 0);
|
||||
/*
|
||||
* Disable muzzy pages cache too, as versions < 5.2.0 have it
|
||||
* enabled by default. The muzzy pages cache goes right below the
|
||||
* dirty pages cache and backs it.
|
||||
*/
|
||||
(void)isc_mem_arena_set_muzzy_decay_ms(manager->send_mctx, 0);
|
||||
|
||||
manager->magic = MANAGER_MAGIC;
|
||||
|
||||
MTRACE("create");
|
||||
|
@@ -144,6 +144,7 @@ struct ns_clientmgr {
|
||||
unsigned int magic;
|
||||
|
||||
isc_mem_t *mctx;
|
||||
isc_mem_t *send_mctx;
|
||||
ns_server_t *sctx;
|
||||
isc_refcount_t references;
|
||||
uint32_t tid;
|
||||
|
@@ -55,6 +55,11 @@ ns_server_create(isc_mem_t *mctx, ns_matchview_t matchingview,
|
||||
|
||||
isc_mem_attach(mctx, &sctx->mctx);
|
||||
|
||||
/*
|
||||
* See here for more details:
|
||||
* https://github.com/jemalloc/jemalloc/issues/2483
|
||||
*/
|
||||
|
||||
isc_refcount_init(&sctx->references, 1);
|
||||
|
||||
isc_quota_init(&sctx->xfroutquota, 10);
|
||||
|
Reference in New Issue
Block a user