2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-30 14:07:59 +00:00
bind/lib/isc/quota.c
Ondřej Surý 634bdfb16d Refactor netmgr and add more unit tests
This is a part of the works that intends to make the netmgr stable,
testable, maintainable and tested.  It contains a numerous changes to
the netmgr code and unfortunately, it was not possible to split this
into smaller chunks as the work here needs to be committed as a complete
works.

NOTE: There's a quite a lot of duplicated code between udp.c, tcp.c and
tcpdns.c and it should be a subject to refactoring in the future.

The changes that are included in this commit are listed here
(extensively, but not exclusively):

* The netmgr_test unit test was split into individual tests (udp_test,
  tcp_test, tcpdns_test and newly added tcp_quota_test)

* The udp_test and tcp_test has been extended to allow programatic
  failures from the libuv API.  Unfortunately, we can't use cmocka
  mock() and will_return(), so we emulate the behaviour with #define and
  including the netmgr/{udp,tcp}.c source file directly.

* The netievents that we put on the nm queue have variable number of
  members, out of these the isc_nmsocket_t and isc_nmhandle_t always
  needs to be attached before enqueueing the netievent_<foo> and
  detached after we have called the isc_nm_async_<foo> to ensure that
  the socket (handle) doesn't disappear between scheduling the event and
  actually executing the event.

* Cancelling the in-flight TCP connection using libuv requires to call
  uv_close() on the original uv_tcp_t handle which just breaks too many
  assumptions we have in the netmgr code.  Instead of using uv_timer for
  TCP connection timeouts, we use platform specific socket option.

* Fix the synchronization between {nm,async}_{listentcp,tcpconnect}

  When isc_nm_listentcp() or isc_nm_tcpconnect() is called it was
  waiting for socket to either end up with error (that path was fine) or
  to be listening or connected using condition variable and mutex.

  Several things could happen:

    0. everything is ok

    1. the waiting thread would miss the SIGNAL() - because the enqueued
       event would be processed faster than we could start WAIT()ing.
       In case the operation would end up with error, it would be ok, as
       the error variable would be unchanged.

    2. the waiting thread miss the sock->{connected,listening} = `true`
       would be set to `false` in the tcp_{listen,connect}close_cb() as
       the connection would be so short lived that the socket would be
       closed before we could even start WAIT()ing

* The tcpdns has been converted to using libuv directly.  Previously,
  the tcpdns protocol used tcp protocol from netmgr, this proved to be
  very complicated to understand, fix and make changes to.  The new
  tcpdns protocol is modeled in a similar way how tcp netmgr protocol.
  Closes: #2194, #2283, #2318, #2266, #2034, #1920

* The tcp and tcpdns is now not using isc_uv_import/isc_uv_export to
  pass accepted TCP sockets between netthreads, but instead (similar to
  UDP) uses per netthread uv_loop listener.  This greatly reduces the
  complexity as the socket is always run in the associated nm and uv
  loops, and we are also not touching the libuv internals.

  There's an unfortunate side effect though, the new code requires
  support for load-balanced sockets from the operating system for both
  UDP and TCP (see #2137).  If the operating system doesn't support the
  load balanced sockets (either SO_REUSEPORT on Linux or SO_REUSEPORT_LB
  on FreeBSD 12+), the number of netthreads is limited to 1.

* The netmgr has now two debugging #ifdefs:

  1. Already existing NETMGR_TRACE prints any dangling nmsockets and
     nmhandles before triggering assertion failure.  This options would
     reduce performance when enabled, but in theory, it could be enabled
     on low-performance systems.

  2. New NETMGR_TRACE_VERBOSE option has been added that enables
     extensive netmgr logging that allows the software engineer to
     precisely track any attach/detach operations on the nmsockets and
     nmhandles.  This is not suitable for any kind of production
     machine, only for debugging.

* The tlsdns netmgr protocol has been split from the tcpdns and it still
  uses the old method of stacking the netmgr boxes on top of each other.
  We will have to refactor the tlsdns netmgr protocol to use the same
  approach - build the stack using only libuv and openssl.

* Limit but not assert the tcp buffer size in tcp_alloc_cb
  Closes: #2061
2020-12-01 16:47:07 +01:00

197 lines
4.8 KiB
C

/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
/*! \file */
#include <stddef.h>
#include <isc/atomic.h>
#include <isc/quota.h>
#include <isc/util.h>
#define QUOTA_MAGIC ISC_MAGIC('Q', 'U', 'O', 'T')
#define VALID_QUOTA(p) ISC_MAGIC_VALID(p, QUOTA_MAGIC)
#define QUOTA_CB_MAGIC ISC_MAGIC('Q', 'T', 'C', 'B')
#define VALID_QUOTA_CB(p) ISC_MAGIC_VALID(p, QUOTA_CB_MAGIC)
void
isc_quota_init(isc_quota_t *quota, unsigned int max) {
atomic_init(&quota->max, max);
atomic_init(&quota->used, 0);
atomic_init(&quota->soft, 0);
atomic_init(&quota->waiting, 0);
ISC_LIST_INIT(quota->cbs);
isc_mutex_init(&quota->cblock);
quota->magic = QUOTA_MAGIC;
}
void
isc_quota_destroy(isc_quota_t *quota) {
REQUIRE(VALID_QUOTA(quota));
quota->magic = 0;
INSIST(atomic_load(&quota->used) == 0);
INSIST(atomic_load(&quota->waiting) == 0);
INSIST(ISC_LIST_EMPTY(quota->cbs));
atomic_store_release(&quota->max, 0);
atomic_store_release(&quota->used, 0);
atomic_store_release(&quota->soft, 0);
isc_mutex_destroy(&quota->cblock);
}
void
isc_quota_soft(isc_quota_t *quota, unsigned int soft) {
REQUIRE(VALID_QUOTA(quota));
atomic_store_release(&quota->soft, soft);
}
void
isc_quota_max(isc_quota_t *quota, unsigned int max) {
REQUIRE(VALID_QUOTA(quota));
atomic_store_release(&quota->max, max);
}
unsigned int
isc_quota_getmax(isc_quota_t *quota) {
REQUIRE(VALID_QUOTA(quota));
return (atomic_load_relaxed(&quota->max));
}
unsigned int
isc_quota_getsoft(isc_quota_t *quota) {
REQUIRE(VALID_QUOTA(quota));
return (atomic_load_relaxed(&quota->soft));
}
unsigned int
isc_quota_getused(isc_quota_t *quota) {
REQUIRE(VALID_QUOTA(quota));
return (atomic_load_relaxed(&quota->used));
}
static isc_result_t
quota_reserve(isc_quota_t *quota) {
isc_result_t result;
uint_fast32_t max = atomic_load_acquire(&quota->max);
uint_fast32_t soft = atomic_load_acquire(&quota->soft);
uint_fast32_t used = atomic_load_acquire(&quota->used);
do {
if (max != 0 && used >= max) {
return (ISC_R_QUOTA);
}
if (soft != 0 && used >= soft) {
result = ISC_R_SOFTQUOTA;
} else {
result = ISC_R_SUCCESS;
}
} while (!atomic_compare_exchange_weak_acq_rel(&quota->used, &used,
used + 1));
return (result);
}
/* Must be quota->cbslock locked */
static void
enqueue(isc_quota_t *quota, isc_quota_cb_t *cb) {
REQUIRE(cb != NULL);
ISC_LIST_ENQUEUE(quota->cbs, cb, link);
atomic_fetch_add_release(&quota->waiting, 1);
}
/* Must be quota->cbslock locked */
static isc_quota_cb_t *
dequeue(isc_quota_t *quota) {
isc_quota_cb_t *cb = ISC_LIST_HEAD(quota->cbs);
INSIST(cb != NULL);
ISC_LIST_DEQUEUE(quota->cbs, cb, link);
atomic_fetch_sub_relaxed(&quota->waiting, 1);
return (cb);
}
static void
quota_release(isc_quota_t *quota) {
/*
* This is opportunistic - we might race with a failing quota_attach_cb
* and not detect that something is waiting, but eventually someone will
* be releasing quota and will detect it, so we don't need to worry -
* and we're saving a lot by not locking cblock every time.
*/
if (atomic_load_acquire(&quota->waiting) > 0) {
isc_quota_cb_t *cb = NULL;
LOCK(&quota->cblock);
if (atomic_load_relaxed(&quota->waiting) > 0) {
cb = dequeue(quota);
}
UNLOCK(&quota->cblock);
if (cb != NULL) {
cb->cb_func(quota, cb->data);
return;
}
}
INSIST(atomic_fetch_sub_release(&quota->used, 1) > 0);
}
static isc_result_t
doattach(isc_quota_t *quota, isc_quota_t **p) {
isc_result_t result;
REQUIRE(p != NULL && *p == NULL);
result = quota_reserve(quota);
if (result == ISC_R_SUCCESS || result == ISC_R_SOFTQUOTA) {
*p = quota;
}
return (result);
}
isc_result_t
isc_quota_attach(isc_quota_t *quota, isc_quota_t **quotap) {
REQUIRE(VALID_QUOTA(quota));
REQUIRE(quotap != NULL && *quotap == NULL);
return (isc_quota_attach_cb(quota, quotap, NULL));
}
isc_result_t
isc_quota_attach_cb(isc_quota_t *quota, isc_quota_t **quotap,
isc_quota_cb_t *cb) {
REQUIRE(VALID_QUOTA(quota));
REQUIRE(cb == NULL || VALID_QUOTA_CB(cb));
REQUIRE(quotap != NULL && *quotap == NULL);
isc_result_t result = doattach(quota, quotap);
if (result == ISC_R_QUOTA && cb != NULL) {
LOCK(&quota->cblock);
enqueue(quota, cb);
UNLOCK(&quota->cblock);
}
return (result);
}
void
isc_quota_cb_init(isc_quota_cb_t *cb, isc_quota_cb_func_t cb_func, void *data) {
ISC_LINK_INIT(cb, link);
cb->cb_func = cb_func;
cb->data = data;
cb->magic = QUOTA_CB_MAGIC;
}
void
isc_quota_detach(isc_quota_t **quotap) {
REQUIRE(quotap != NULL && VALID_QUOTA(*quotap));
isc_quota_t *quota = *quotap;
*quotap = NULL;
quota_release(quota);
}