2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-29 05:28:00 +00:00
bind/lib/isc/netmgr/netmgr-int.h
Witold Kręcicki 70397f9d92 netmgr: libuv-based network manager
This is a replacement for the existing isc_socket and isc_socketmgr
implementation. It uses libuv for asynchronous network communication;
"networker" objects will be distributed across worker threads reading
incoming packets and sending them for processing.

UDP listener sockets automatically create an array of "child" sockets
so each worker can listen separately.

TCP sockets are shared amongst worker threads.

A TCPDNS socket is a wrapper around a TCP socket, which handles the
the two-byte length field at the beginning of DNS messages over TCP.

(Other wrapper socket types can be implemented in the future to handle
DNS over TLS, DNS over HTTPS, etc.)
2019-11-07 11:55:37 -08:00

548 lines
14 KiB
C

/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
#include <unistd.h>
#include <uv.h>
#include <isc/astack.h>
#include <isc/atomic.h>
#include <isc/buffer.h>
#include <isc/condition.h>
#include <isc/magic.h>
#include <isc/mem.h>
#include <isc/netmgr.h>
#include <isc/queue.h>
#include <isc/random.h>
#include <isc/refcount.h>
#include <isc/region.h>
#include <isc/result.h>
#include <isc/sockaddr.h>
#include <isc/thread.h>
#include <isc/util.h>
#define ISC_NETMGR_TID_UNKNOWN -1
#define ISC_NETMGR_TID_NOTLS -2
/*
* Single network event loop worker.
*/
typedef struct isc__networker {
isc_nm_t * mgr;
int id; /* thread id */
uv_loop_t loop; /* libuv loop structure */
uv_async_t async; /* async channel to send
* data to this networker */
isc_mutex_t lock;
isc_mempool_t *mpool_bufs;
isc_condition_t cond;
bool paused;
bool finished;
isc_thread_t thread;
isc_queue_t *ievents; /* incoming async events */
isc_refcount_t references;
atomic_int_fast64_t pktcount;
char udprecvbuf[65536];
bool udprecvbuf_inuse;
} isc__networker_t;
/*
* A general handle for a connection bound to a networker. For UDP
* connections we have peer address here, so both TCP and UDP can be
* handled with a simple send-like function
*/
#define NMHANDLE_MAGIC ISC_MAGIC('N', 'M', 'H', 'D')
#define VALID_NMHANDLE(t) ISC_MAGIC_VALID(t, \
NMHANDLE_MAGIC)
typedef void (*isc__nm_closecb)(isc_nmhandle_t *);
struct isc_nmhandle {
int magic;
isc_refcount_t references;
/*
* The socket is not 'attached' in the traditional
* reference-counting sense. Instead, we keep all handles in an
* array in the socket object. This way, we don't have circular
* dependencies and we can close all handles when we're destroying
* the socket.
*/
isc_nmsocket_t *sock;
size_t ah_pos; /* Position in the socket's
* 'active handles' array */
/*
* The handle is 'inflight' if netmgr is not currently processing
* it in any way - it might mean that e.g. a recursive resolution
* is happening. For an inflight handle we must wait for the
* calling code to finish before we can free it.
*/
atomic_bool inflight;
isc_sockaddr_t peer;
isc_sockaddr_t local;
isc_nm_opaquecb doreset; /* reset extra callback, external */
isc_nm_opaquecb dofree; /* free extra callback, external */
void * opaque;
char extra[];
};
/*
* An interface - an address we can listen on.
*/
struct isc_nmiface {
isc_sockaddr_t addr;
};
typedef enum isc__netievent_type {
netievent_stop,
netievent_udplisten,
netievent_udpstoplisten,
netievent_udpsend,
netievent_udprecv,
netievent_tcpconnect,
netievent_tcpsend,
netievent_tcprecv,
netievent_tcpstartread,
netievent_tcppauseread,
netievent_tcplisten,
netievent_tcpstoplisten,
netievent_tcpclose,
} isc__netievent_type;
typedef struct isc__netievent_stop {
isc__netievent_type type;
} isc__netievent_stop_t;
/*
* We have to split it because we can read and write on a socket
* simultaneously.
*/
typedef union {
isc_nm_recv_cb_t recv;
isc_nm_cb_t accept;
} isc__nm_readcb_t;
typedef union {
isc_nm_cb_t send;
isc_nm_cb_t connect;
} isc__nm_writecb_t;
typedef union {
isc_nm_recv_cb_t recv;
isc_nm_cb_t accept;
isc_nm_cb_t send;
isc_nm_cb_t connect;
} isc__nm_cb_t;
/*
* Wrapper around uv_req_t with 'our' fields in it. req->data should
* always point to its parent. Note that we always allocate more than
* sizeof(struct) because we make room for different req types;
*/
#define UVREQ_MAGIC ISC_MAGIC('N', 'M', 'U', 'R')
#define VALID_UVREQ(t) ISC_MAGIC_VALID(t, UVREQ_MAGIC)
typedef struct isc__nm_uvreq {
int magic;
isc_nmsocket_t * sock;
isc_nmhandle_t * handle;
uv_buf_t uvbuf; /* translated isc_region_t, to be
sent or received */
isc_sockaddr_t local; /* local address */
isc_sockaddr_t peer; /* peer address */
isc__nm_cb_t cb; /* callback */
void * cbarg; /* callback argument */
union {
uv_req_t req;
uv_getaddrinfo_t getaddrinfo;
uv_getnameinfo_t getnameinfo;
uv_shutdown_t shutdown;
uv_write_t write;
uv_connect_t connect;
uv_udp_send_t udp_send;
uv_fs_t fs;
uv_work_t work;
} uv_req;
} isc__nm_uvreq_t;
typedef struct isc__netievent__socket {
isc__netievent_type type;
isc_nmsocket_t *sock;
} isc__netievent__socket_t;
typedef isc__netievent__socket_t isc__netievent_udplisten_t;
typedef isc__netievent__socket_t isc__netievent_udpstoplisten_t;
typedef isc__netievent__socket_t isc__netievent_tcpstoplisten_t;
typedef isc__netievent__socket_t isc__netievent_tcpclose_t;
typedef isc__netievent__socket_t isc__netievent_startread_t;
typedef isc__netievent__socket_t isc__netievent_pauseread_t;
typedef isc__netievent__socket_t isc__netievent_resumeread_t;
typedef struct isc__netievent__socket_req {
isc__netievent_type type;
isc_nmsocket_t *sock;
isc__nm_uvreq_t *req;
} isc__netievent__socket_req_t;
typedef isc__netievent__socket_req_t isc__netievent_tcpconnect_t;
typedef isc__netievent__socket_req_t isc__netievent_tcplisten_t;
typedef isc__netievent__socket_req_t isc__netievent_tcpsend_t;
typedef struct isc__netievent_udpsend {
isc__netievent_type type;
isc_nmsocket_t *sock;
isc_sockaddr_t peer;
isc__nm_uvreq_t *req;
} isc__netievent_udpsend_t;
typedef struct isc__netievent {
isc__netievent_type type;
} isc__netievent_t;
typedef union {
isc__netievent_t ni;
isc__netievent_stop_t nis;
isc__netievent_udplisten_t niul;
isc__netievent_udpsend_t nius;
} isc__netievent_storage_t;
/*
* Network manager
*/
#define NM_MAGIC ISC_MAGIC('N', 'E', 'T', 'M')
#define VALID_NM(t) ISC_MAGIC_VALID(t, NM_MAGIC)
struct isc_nm {
int magic;
isc_refcount_t references;
isc_mem_t *mctx;
uint32_t nworkers;
isc_mutex_t lock;
isc_condition_t wkstatecond;
isc__networker_t *workers;
atomic_uint_fast32_t workers_running;
atomic_uint_fast32_t workers_paused;
atomic_uint_fast32_t maxudp;
atomic_bool paused;
/*
* A worker is actively waiting for other workers, for example to
* stop listening; that means no other thread can do the same thing
* or pause, or we'll deadlock. We have to either re-enqueue our
* event or wait for the other one to finish if we want to pause.
*/
atomic_bool interlocked;
};
typedef enum isc_nmsocket_type {
isc_nm_udpsocket,
isc_nm_udplistener, /* Aggregate of nm_udpsocks */
isc_nm_tcpsocket,
isc_nm_tcplistener,
isc_nm_tcpdnslistener,
isc_nm_tcpdnssocket
} isc_nmsocket_type;
/*%
* A universal structure for either a single socket or a group of
* dup'd/SO_REUSE_PORT-using sockets listening on the same interface.
*/
#define NMSOCK_MAGIC ISC_MAGIC('N', 'M', 'S', 'K')
#define VALID_NMSOCK(t) ISC_MAGIC_VALID(t, NMSOCK_MAGIC)
struct isc_nmsocket {
/*% Unlocked, RO */
int magic;
int tid;
isc_nmsocket_type type;
isc_nm_t *mgr;
isc_nmsocket_t *parent;
isc_quota_t *quota;
bool overquota;
/*% outer socket is for 'wrapped' sockets - e.g. tcpdns in tcp */
isc_nmsocket_t *outer;
/*% server socket for connections */
isc_nmsocket_t *server;
/*% children sockets for multi-socket setups */
isc_nmsocket_t *children;
int nchildren;
isc_nmiface_t *iface;
isc_nmhandle_t *tcphandle;
/*% extra data allocated at the end of each isc_nmhandle_t */
size_t extrahandlesize;
/*% libuv data */
uv_os_sock_t fd;
union uv_any_handle uv_handle;
isc_sockaddr_t peer;
/* Atomic */
/*% Number of running (e.g. listening) children sockets */
atomic_int_fast32_t rchildren;
/*%
* Socket if active if it's listening, working, etc., if we're
* closing a socket it doesn't make any sense to e.g. still
* push handles or reqs for reuse
*/
atomic_bool active;
atomic_bool destroying;
/*%
* Socket is closed if it's not active and all the possible
* callbacks were fired, there are no active handles, etc.
* active==false, closed==false means the socket is closing.
*/
atomic_bool closed;
atomic_bool listening;
isc_refcount_t references;
/*%
* TCPDNS socket is not pipelining.
*/
atomic_bool sequential;
/*%
* TCPDNS socket in sequential mode is currently processing a packet,
* we need to wait until it finishes.
*/
atomic_bool processing;
/*%
* 'spare' handles for that can be reused to avoid allocations,
* for UDP.
*/
isc_astack_t *inactivehandles;
isc_astack_t *inactivereqs;
/* Used for active/rchildren during shutdown */
isc_mutex_t lock;
isc_condition_t cond;
/*%
* List of active handles.
* ah_size - size of ah_frees and ah_handles
* ah_cpos - current position in ah_frees;
* ah_handles - array of *handles.
* Adding a handle
* - if ah_cpos == ah_size, realloc
* - x = ah_frees[ah_cpos]
* - ah_frees[ah_cpos++] = 0;
* - ah_handles[x] = handle
* - x must be stored with the handle!
* Removing a handle:
* - ah_frees[--ah_cpos] = x
* - ah_handles[x] = NULL;
*
* XXXWPK for now this is locked with socket->lock, but we might want
* to change it to something lockless
*/
size_t ah_size;
size_t ah_cpos;
size_t *ah_frees;
isc_nmhandle_t **ah_handles;
/* Buffer for TCPDNS processing, optional */
size_t buf_size;
size_t buf_len;
unsigned char *buf;
isc__nm_readcb_t rcb;
void *rcbarg;
};
bool
isc__nm_in_netthread(void);
/*%
* Returns 'true' if we're in the network thread.
*/
void *
isc__nm_get_ievent(isc_nm_t *mgr, isc__netievent_type type);
/*%<
* Allocate an ievent and set the type.
*/
void
isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event);
/*%<
* Enqueue an ievent onto a specific worker queue. (This the only safe
* way to use an isc__networker_t from another thread.)
*/
void
isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf);
/*%<
* Allocator for recv operations.
*
* Note that as currently implemented, this doesn't actually
* allocate anything, it just assigns the the isc__networker's UDP
* receive buffer to a socket, and marks it as "in use".
*/
void
isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf);
/*%<
* Free a buffer allocated for a receive operation.
*
* Note that as currently implemented, this doesn't actually
* free anything, marks the isc__networker's UDP receive buffer
* as "not in use".
*/
isc_nmhandle_t *
isc__nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer,
isc_sockaddr_t *local);
/*%<
* Get a handle for the socket 'sock', allocating a new one
* if there isn't one availbale in 'sock->inactivehandles'.
*
* If 'peer' is not NULL, set the handle's peer address to 'peer',
* otherwise set it to 'sock->peer'.
*
* If 'local' is not NULL, set the handle's local address to 'local',
* otherwise set it to 'sock->iface->addr'.
*/
isc__nm_uvreq_t *
isc__nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock);
/*%<
* Get a UV request structure for the socket 'sock', allocating a
* new one if there isn't one availbale in 'sock->inactivereqs'.
*/
void
isc__nm_uvreq_put(isc__nm_uvreq_t **req, isc_nmsocket_t *sock);
/*%<
* Completes the use of a UV request structure, setting '*req' to NULL.
*
* The UV request is pushed onto the 'sock->inactivereqs' stack or,
* if that doesn't work, freed.
*/
void
isc__nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr,
isc_nmsocket_type type);
/*%<
* Initialize socket 'sock', attach it to 'mgr', and set it to type 'type'.
*/
void
isc__nmsocket_prep_destroy(isc_nmsocket_t *sock);
/*%<
* Market 'sock' as inactive, close it if necessary, and destroy it
* if there are no remaining references or active handles.
*/
isc_result_t
isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region,
isc_nm_cb_t cb, void *cbarg);
/*%<
* Back-end implemenation of isc_nm_send() for UDP handles.
*/
void
isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ievent0);
void
isc__nm_async_udpstoplisten(isc__networker_t *worker,
isc__netievent_t *ievent0);
void
isc__nm_async_udpsend(isc__networker_t *worker, isc__netievent_t *ievent0);
/*%<
* Callback handlers for asynchronous UDP events (listen, stoplisten, send).
*/
isc_result_t
isc__nm_tcp_send(isc_nmhandle_t *handle, isc_region_t *region,
isc_nm_cb_t cb, void *cbarg);
/*%<
* Back-end implemenation of isc_nm_send() for TCP handles.
*/
void
isc__nm_tcp_close(isc_nmsocket_t *sock);
/*%<
* Close a TCP socket.
*/
void
isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ievent0);
void
isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ievent0);
void
isc__nm_async_tcpstoplisten(isc__networker_t *worker,
isc__netievent_t *ievent0);
void
isc__nm_async_tcpsend(isc__networker_t *worker, isc__netievent_t *ievent0);
void
isc__nm_async_startread(isc__networker_t *worker, isc__netievent_t *ievent0);
void
isc__nm_async_pauseread(isc__networker_t *worker, isc__netievent_t *ievent0);
void
isc__nm_async_resumeread(isc__networker_t *worker, isc__netievent_t *ievent0);
void
isc__nm_async_tcpclose(isc__networker_t *worker, isc__netievent_t *ievent0);
/*%<
* Callback handlers for asynchronous TCP events (connect, listen,
* stoplisten, send, read, pauseread, resumeread, close).
*/
isc_result_t
isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region,
isc_nm_cb_t cb, void *cbarg);
/*%<
* Back-end implemenation of isc_nm_send() for TCPDNS handles.
*/
void
isc__nm_tcpdns_close(isc_nmsocket_t *sock);
/*%<
* Close a TCPDNS socket.
*/
#define isc__nm_uverr2result(x) \
isc___nm_uverr2result(x, true, __FILE__, __LINE__)
isc_result_t
isc___nm_uverr2result(int uverr, bool dolog,
const char *file, unsigned int line);
/*%<
* Convert a libuv error value into an isc_result_t. The
* list of supported error values is not complete; new users
* of this function should add any expected errors that are
* not already there.
*/
bool
isc__nm_acquire_interlocked(isc_nm_t *mgr);
/*%<
* Try to acquire interlocked state; return true if successful.
*/
void
isc__nm_drop_interlocked(isc_nm_t *mgr);
/*%<
* Drop interlocked state; signal waiters.
*/
void
isc__nm_acquire_interlocked_force(isc_nm_t *mgr);
/*%<
* Actively wait for interlocked state.
*/