2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-09-02 15:45:25 +00:00

netmgr: TCP improvements

- add timeout support for TCP and TCPDNS connections to protect against
  slowloris style attacks. currently, all timeouts are hard-coded.
- rework and simplify the TCPDNS state machine.
This commit is contained in:
Witold Kręcicki
2019-11-19 11:56:00 +01:00
committed by Evan Hunt
parent 751ad12dea
commit b7a72b1667
4 changed files with 357 additions and 255 deletions

View File

@@ -116,6 +116,7 @@ typedef enum isc__netievent_type {
netievent_tcplisten, netievent_tcplisten,
netievent_tcpstoplisten, netievent_tcpstoplisten,
netievent_tcpclose, netievent_tcpclose,
netievent_closecb,
} isc__netievent_type; } isc__netievent_type;
typedef struct isc__netievent_stop { typedef struct isc__netievent_stop {
@@ -186,6 +187,7 @@ typedef isc__netievent__socket_t isc__netievent_tcpclose_t;
typedef isc__netievent__socket_t isc__netievent_startread_t; typedef isc__netievent__socket_t isc__netievent_startread_t;
typedef isc__netievent__socket_t isc__netievent_pauseread_t; typedef isc__netievent__socket_t isc__netievent_pauseread_t;
typedef isc__netievent__socket_t isc__netievent_resumeread_t; typedef isc__netievent__socket_t isc__netievent_resumeread_t;
typedef isc__netievent__socket_t isc__netievent_closecb_t;
typedef struct isc__netievent__socket_req { typedef struct isc__netievent__socket_req {
isc__netievent_type type; isc__netievent_type type;
@@ -268,6 +270,9 @@ struct isc_nmsocket {
isc_nmsocket_t *parent; isc_nmsocket_t *parent;
isc_quota_t *quota; isc_quota_t *quota;
bool overquota; bool overquota;
uv_timer_t timer;
bool timer_initialized;
uint64_t read_timeout;
/*% outer socket is for 'wrapped' sockets - e.g. tcpdns in tcp */ /*% outer socket is for 'wrapped' sockets - e.g. tcpdns in tcp */
isc_nmsocket_t *outer; isc_nmsocket_t *outer;
@@ -366,7 +371,7 @@ struct isc_nmsocket {
* might want to change it to something lockless in the * might want to change it to something lockless in the
* future. * future.
*/ */
size_t ah; atomic_int_fast32_t ah;
size_t ah_size; size_t ah_size;
size_t *ah_frees; size_t *ah_frees;
isc_nmhandle_t **ah_handles; isc_nmhandle_t **ah_handles;
@@ -398,6 +403,8 @@ isc__nm_get_ievent(isc_nm_t *mgr, isc__netievent_type type);
/*%< /*%<
* Allocate an ievent and set the type. * Allocate an ievent and set the type.
*/ */
void
isc__nm_put_ievent(isc_nm_t *mgr, void *ievent);
void void
isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event); isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event);
@@ -471,6 +478,12 @@ isc__nmsocket_prep_destroy(isc_nmsocket_t *sock);
* if there are no remaining references or active handles. * if there are no remaining references or active handles.
*/ */
void
isc__nm_async_closecb(isc__networker_t *worker, isc__netievent_t *ievent0);
/*%<
* Issue a 'handle closed' callback on the socket.
*/
isc_result_t isc_result_t
isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region, isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region,
isc_nm_cb_t cb, void *cbarg); isc_nm_cb_t cb, void *cbarg);

View File

@@ -450,12 +450,15 @@ async_cb(uv_async_t *handle) {
case netievent_tcpclose: case netievent_tcpclose:
isc__nm_async_tcpclose(worker, ievent); isc__nm_async_tcpclose(worker, ievent);
break; break;
case netievent_closecb:
isc__nm_async_closecb(worker, ievent);
break;
default: default:
INSIST(0); INSIST(0);
ISC_UNREACHABLE(); ISC_UNREACHABLE();
} }
isc_mem_put(worker->mgr->mctx, ievent,
sizeof(isc__netievent_storage_t)); isc__nm_put_ievent(worker->mgr, ievent);
} }
} }
@@ -471,6 +474,11 @@ isc__nm_get_ievent(isc_nm_t *mgr, isc__netievent_type type) {
return (event); return (event);
} }
void
isc__nm_put_ievent(isc_nm_t *mgr, void *ievent) {
isc_mem_put(mgr->mctx, ievent, sizeof(isc__netievent_storage_t));
}
void void
isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event) { isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event) {
isc_queue_enqueue(worker->ievents, (uintptr_t)event); isc_queue_enqueue(worker->ievents, (uintptr_t)event);
@@ -552,6 +560,11 @@ nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree) {
isc_quota_detach(&sock->quota); isc_quota_detach(&sock->quota);
} }
if (sock->timer_initialized) {
uv_close((uv_handle_t *)&sock->timer, NULL);
sock->timer_initialized = false;
}
isc_astack_destroy(sock->inactivehandles); isc_astack_destroy(sock->inactivehandles);
while ((uvreq = isc_astack_pop(sock->inactivereqs)) != NULL) { while ((uvreq = isc_astack_pop(sock->inactivereqs)) != NULL) {
@@ -570,7 +583,6 @@ nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree) {
} else { } else {
isc_nm_detach(&sock->mgr); isc_nm_detach(&sock->mgr);
} }
} }
static void static void
@@ -596,11 +608,11 @@ nmsocket_maybe_destroy(isc_nmsocket_t *sock) {
* accept destruction. * accept destruction.
*/ */
LOCK(&sock->lock); LOCK(&sock->lock);
active_handles += sock->ah; active_handles += atomic_load(&sock->ah);
if (sock->children != NULL) { if (sock->children != NULL) {
for (int i = 0; i < sock->nchildren; i++) { for (int i = 0; i < sock->nchildren; i++) {
LOCK(&sock->children[i].lock); LOCK(&sock->children[i].lock);
active_handles += sock->children[i].ah; active_handles += atomic_load(&sock->children[i].ah);
UNLOCK(&sock->children[i].lock); UNLOCK(&sock->children[i].lock);
} }
} }
@@ -780,6 +792,7 @@ isc__nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer,
isc_sockaddr_t *local) isc_sockaddr_t *local)
{ {
isc_nmhandle_t *handle = NULL; isc_nmhandle_t *handle = NULL;
size_t handlenum;
int pos; int pos;
REQUIRE(VALID_NMSOCK(sock)); REQUIRE(VALID_NMSOCK(sock));
@@ -812,7 +825,7 @@ isc__nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer,
LOCK(&sock->lock); LOCK(&sock->lock);
/* We need to add this handle to the list of active handles */ /* We need to add this handle to the list of active handles */
if (sock->ah == sock->ah_size) { if ((size_t) atomic_load(&sock->ah) == sock->ah_size) {
sock->ah_frees = sock->ah_frees =
isc_mem_reallocate(sock->mgr->mctx, sock->ah_frees, isc_mem_reallocate(sock->mgr->mctx, sock->ah_frees,
sock->ah_size * 2 * sock->ah_size * 2 *
@@ -831,7 +844,9 @@ isc__nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer,
sock->ah_size *= 2; sock->ah_size *= 2;
} }
pos = sock->ah_frees[sock->ah++]; handlenum = atomic_fetch_add(&sock->ah, 1);
pos = sock->ah_frees[handlenum];
INSIST(sock->ah_handles[pos] == NULL); INSIST(sock->ah_handles[pos] == NULL);
sock->ah_handles[pos] = handle; sock->ah_handles[pos] = handle;
handle->ah_pos = pos; handle->ah_pos = pos;
@@ -875,62 +890,85 @@ nmhandle_free(isc_nmsocket_t *sock, isc_nmhandle_t *handle) {
*handle = (isc_nmhandle_t) { *handle = (isc_nmhandle_t) {
.magic = 0 .magic = 0
}; };
isc_mem_put(sock->mgr->mctx, handle, sizeof(isc_nmhandle_t) + extra); isc_mem_put(sock->mgr->mctx, handle, sizeof(isc_nmhandle_t) + extra);
} }
void void
isc_nmhandle_unref(isc_nmhandle_t *handle) { isc_nmhandle_unref(isc_nmhandle_t *handle) {
isc_nmsocket_t *sock = NULL;
size_t handlenum;
bool reuse = false;
int refs; int refs;
REQUIRE(VALID_NMHANDLE(handle)); REQUIRE(VALID_NMHANDLE(handle));
refs = isc_refcount_decrement(&handle->references); refs = isc_refcount_decrement(&handle->references);
INSIST(refs > 0); INSIST(refs > 0);
if (refs == 1) { if (refs > 1) {
isc_nmsocket_t *sock = handle->sock; return;
bool reuse = false; }
handle->sock = NULL; sock = handle->sock;
if (handle->doreset != NULL) { handle->sock = NULL;
handle->doreset(handle->opaque);
}
/* if (handle->doreset != NULL) {
* We do it all under lock to avoid races with socket handle->doreset(handle->opaque);
* destruction. }
*/
LOCK(&sock->lock);
INSIST(sock->ah_handles[handle->ah_pos] == handle);
INSIST(sock->ah_size > handle->ah_pos);
INSIST(sock->ah > 0);
sock->ah_handles[handle->ah_pos] = NULL;
sock->ah_frees[--sock->ah] = handle->ah_pos;
handle->ah_pos = 0;
if (atomic_load(&sock->active)) { /*
reuse = isc_astack_trypush(sock->inactivehandles, * We do all of this under lock to avoid races with socket
handle); * destruction.
} */
UNLOCK(&sock->lock); LOCK(&sock->lock);
/* INSIST(sock->ah_handles[handle->ah_pos] == handle);
* Handle is closed. If the socket has a callback INSIST(sock->ah_size > handle->ah_pos);
* configured for that (e.g., to perform cleanup after INSIST(atomic_load(&sock->ah) > 0);
* request processing), call it now.
*/ sock->ah_handles[handle->ah_pos] = NULL;
if (sock->closehandle_cb != NULL) { handlenum = atomic_fetch_sub(&sock->ah, 1) - 1;
sock->ah_frees[handlenum] = handle->ah_pos;
handle->ah_pos = 0;
if (atomic_load(&sock->active)) {
reuse = isc_astack_trypush(sock->inactivehandles,
handle);
}
UNLOCK(&sock->lock);
if (!reuse) {
nmhandle_free(sock, handle);
}
/*
* The handle is closed. If the socket has a callback configured
* for that (e.g., to perform cleanup after request processing),
* call it now.
*/
if (sock->closehandle_cb != NULL) {
if (sock->tid == isc_nm_tid()) {
sock->closehandle_cb(sock); sock->closehandle_cb(sock);
}
if (!reuse) { /*
nmhandle_free(sock, handle); * If we do this asynchronously then
} * the async event will clean it up.
*/
if (sock->ah == 0 &&
!atomic_load(&sock->active) &&
!atomic_load(&sock->destroying))
{
nmsocket_maybe_destroy(sock);
}
} else {
if (sock->ah == 0 && isc__netievent_closecb_t * event =
!atomic_load(&sock->active) && isc__nm_get_ievent(sock->mgr,
!atomic_load(&sock->destroying)) netievent_closecb);
{ isc_nmsocket_attach(sock, &event->sock);
nmsocket_maybe_destroy(sock); isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
(isc__netievent_t *) event);
} }
} }
} }
@@ -1055,6 +1093,21 @@ isc_nm_send(isc_nmhandle_t *handle, isc_region_t *region,
} }
} }
void
isc__nm_async_closecb(isc__networker_t *worker, isc__netievent_t *ievent0) {
isc__netievent_closecb_t *ievent =
(isc__netievent_closecb_t *) ievent0;
REQUIRE(VALID_NMSOCK(ievent->sock));
REQUIRE(ievent->sock->tid == isc_nm_tid());
REQUIRE(ievent->sock->closehandle_cb != NULL);
UNUSED(worker);
ievent->sock->closehandle_cb(ievent->sock);
isc_nmsocket_detach(&ievent->sock);
}
bool bool
isc__nm_acquire_interlocked(isc_nm_t *mgr) { isc__nm_acquire_interlocked(isc_nm_t *mgr) {
LOCK(&mgr->lock); LOCK(&mgr->lock);

View File

@@ -242,25 +242,52 @@ isc__nm_async_tcpstoplisten(isc__networker_t *worker,
uv_close(&sock->uv_handle.handle, stoplistening_cb); uv_close(&sock->uv_handle.handle, stoplistening_cb);
} }
static void
readtimeout_cb(uv_timer_t *handle) {
isc_nmsocket_t *sock = (isc_nmsocket_t *) handle->data;
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(sock->tid == isc_nm_tid());
/*
* Socket is actively processing something, so restart the timer
* and return.
*/
if (atomic_load(&sock->processing)) {
uv_timer_start(handle, readtimeout_cb, sock->read_timeout, 0);
return;
}
/*
* Timeout; stop reading and process whatever we have.
*/
uv_read_stop(&sock->uv_handle.stream);
if (sock->quota) {
isc_quota_detach(&sock->quota);
}
sock->rcb.recv(sock->tcphandle, NULL, sock->rcbarg);
}
isc_result_t isc_result_t
isc_nm_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { isc_nm_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
isc_nmsocket_t *sock = NULL; isc_nmsocket_t *sock = NULL;
isc__netievent_startread_t *ievent = NULL;
REQUIRE(VALID_NMHANDLE(handle)); REQUIRE(VALID_NMHANDLE(handle));
REQUIRE(VALID_NMSOCK(handle->sock)); REQUIRE(VALID_NMSOCK(handle->sock));
sock = handle->sock; sock = handle->sock;
sock->rcb.recv = cb; sock->rcb.recv = cb;
sock->rcbarg = cbarg; /* That's obviously broken... */ sock->rcbarg = cbarg;
ievent = isc__nm_get_ievent(sock->mgr, netievent_tcpstartread);
ievent->sock = sock;
if (sock->tid == isc_nm_tid()) { if (sock->tid == isc_nm_tid()) {
int r = uv_read_start(&sock->uv_handle.stream, isc__nm_async_startread(&sock->mgr->workers[sock->tid],
isc__nm_alloc_cb, read_cb); (isc__netievent_t *) ievent);
INSIST(r == 0); isc__nm_put_ievent(sock->mgr, ievent);
} else { } else {
isc__netievent_startread_t *ievent =
isc__nm_get_ievent(sock->mgr,
netievent_tcpstartread);
ievent->sock = sock;
isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
(isc__netievent_t *) ievent); (isc__netievent_t *) ievent);
} }
@@ -275,12 +302,23 @@ isc__nm_async_startread(isc__networker_t *worker, isc__netievent_t *ievent0) {
isc_nmsocket_t *sock = ievent->sock; isc_nmsocket_t *sock = ievent->sock;
REQUIRE(worker->id == isc_nm_tid()); REQUIRE(worker->id == isc_nm_tid());
if (sock->read_timeout != 0) {
if (!sock->timer_initialized) {
uv_timer_init(&worker->loop, &sock->timer);
sock->timer.data = sock;
sock->timer_initialized = true;
}
uv_timer_start(&sock->timer, readtimeout_cb,
sock->read_timeout, 0);
}
uv_read_start(&sock->uv_handle.stream, isc__nm_alloc_cb, read_cb); uv_read_start(&sock->uv_handle.stream, isc__nm_alloc_cb, read_cb);
} }
isc_result_t isc_result_t
isc_nm_pauseread(isc_nmsocket_t *sock) { isc_nm_pauseread(isc_nmsocket_t *sock) {
isc__netievent_pauseread_t *ievent = NULL;
REQUIRE(VALID_NMSOCK(sock)); REQUIRE(VALID_NMSOCK(sock));
if (atomic_load(&sock->readpaused)) { if (atomic_load(&sock->readpaused)) {
@@ -288,15 +326,14 @@ isc_nm_pauseread(isc_nmsocket_t *sock) {
} }
atomic_store(&sock->readpaused, true); atomic_store(&sock->readpaused, true);
ievent = isc__nm_get_ievent(sock->mgr, netievent_tcppauseread);
ievent->sock = sock;
if (sock->tid == isc_nm_tid()) { if (sock->tid == isc_nm_tid()) {
int r = uv_read_stop(&sock->uv_handle.stream); isc__nm_async_pauseread(&sock->mgr->workers[sock->tid],
INSIST(r == 0); (isc__netievent_t *) ievent);
isc__nm_put_ievent(sock->mgr, ievent);
} else { } else {
isc__netievent_pauseread_t *ievent =
isc__nm_get_ievent(sock->mgr,
netievent_tcppauseread);
ievent->sock = sock;
isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
(isc__netievent_t *) ievent); (isc__netievent_t *) ievent);
} }
@@ -309,15 +346,20 @@ isc__nm_async_pauseread(isc__networker_t *worker, isc__netievent_t *ievent0) {
isc__netievent_pauseread_t *ievent = isc__netievent_pauseread_t *ievent =
(isc__netievent_pauseread_t *) ievent0; (isc__netievent_pauseread_t *) ievent0;
isc_nmsocket_t *sock = ievent->sock; isc_nmsocket_t *sock = ievent->sock;
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(worker->id == isc_nm_tid()); REQUIRE(worker->id == isc_nm_tid());
if (sock->timer_initialized) {
uv_timer_stop(&sock->timer);
}
uv_read_stop(&sock->uv_handle.stream); uv_read_stop(&sock->uv_handle.stream);
} }
isc_result_t isc_result_t
isc_nm_resumeread(isc_nmsocket_t *sock) { isc_nm_resumeread(isc_nmsocket_t *sock) {
isc__netievent_startread_t *ievent = NULL;
REQUIRE(VALID_NMSOCK(sock)); REQUIRE(VALID_NMSOCK(sock));
REQUIRE(sock->rcb.recv != NULL); REQUIRE(sock->rcb.recv != NULL);
@@ -327,16 +369,14 @@ isc_nm_resumeread(isc_nmsocket_t *sock) {
atomic_store(&sock->readpaused, false); atomic_store(&sock->readpaused, false);
ievent = isc__nm_get_ievent(sock->mgr, netievent_tcpstartread);
ievent->sock = sock;
if (sock->tid == isc_nm_tid()) { if (sock->tid == isc_nm_tid()) {
int r = uv_read_start(&sock->uv_handle.stream, isc__nm_async_startread(&sock->mgr->workers[sock->tid],
isc__nm_alloc_cb, read_cb); (isc__netievent_t *) ievent);
INSIST(r == 0); isc__nm_put_ievent(sock->mgr, ievent);
} else { } else {
/* It's the same as startread */
isc__netievent_startread_t *ievent =
isc__nm_get_ievent(sock->mgr,
netievent_tcpstartread);
ievent->sock = sock;
isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
(isc__netievent_t *) ievent); (isc__netievent_t *) ievent);
} }
@@ -359,6 +399,11 @@ read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) {
INSIST(sock->rcb.recv != NULL); INSIST(sock->rcb.recv != NULL);
sock->rcb.recv(sock->tcphandle, &region, sock->rcbarg); sock->rcb.recv(sock->tcphandle, &region, sock->rcbarg);
if (sock->timer_initialized && sock->read_timeout != 0) {
/* The timer will be updated */
uv_timer_start(&sock->timer, readtimeout_cb,
sock->read_timeout, 0);
}
isc__nm_free_uvbuf(sock, buf); isc__nm_free_uvbuf(sock, buf);
return; return;
} }
@@ -440,6 +485,7 @@ accept_connection(isc_nmsocket_t *ssock) {
handle = isc__nmhandle_get(csock, NULL, &local); handle = isc__nmhandle_get(csock, NULL, &local);
INSIST(ssock->rcb.accept != NULL); INSIST(ssock->rcb.accept != NULL);
csock->read_timeout = 1000;
ssock->rcb.accept(handle, ISC_R_SUCCESS, ssock->rcbarg); ssock->rcb.accept(handle, ISC_R_SUCCESS, ssock->rcbarg);
isc_nmsocket_detach(&csock); isc_nmsocket_detach(&csock);
@@ -568,6 +614,16 @@ tcp_close_cb(uv_handle_t *uvhandle) {
isc__nmsocket_prep_destroy(sock); isc__nmsocket_prep_destroy(sock);
} }
static void
timer_close_cb(uv_handle_t *uvhandle) {
isc_nmsocket_t *sock = uvhandle->data;
REQUIRE(VALID_NMSOCK(sock));
isc_nmsocket_detach(&sock->server);
uv_close(&sock->uv_handle.handle, tcp_close_cb);
}
static void static void
tcp_close_direct(isc_nmsocket_t *sock) { tcp_close_direct(isc_nmsocket_t *sock) {
REQUIRE(VALID_NMSOCK(sock)); REQUIRE(VALID_NMSOCK(sock));
@@ -587,9 +643,13 @@ tcp_close_direct(isc_nmsocket_t *sock) {
} }
} }
} }
if (sock->timer_initialized) {
isc_nmsocket_detach(&sock->server); uv_close((uv_handle_t *)&sock->timer, timer_close_cb);
uv_close(&sock->uv_handle.handle, tcp_close_cb); sock->timer_initialized = false;
} else {
isc_nmsocket_detach(&sock->server);
uv_close(&sock->uv_handle.handle, tcp_close_cb);
}
} }
void void

View File

@@ -47,8 +47,16 @@ dnslen(unsigned char* base) {
return ((base[0] << 8) + (base[1])); return ((base[0] << 8) + (base[1]));
} }
/*
* Regular TCP buffer, should suffice in most cases.
*/
#define NM_REG_BUF 4096 #define NM_REG_BUF 4096
#define NM_BIG_BUF 65536 /*
* Two full DNS packets with lengths.
* netmgr receives 64k at most so there's no risk
* of overrun.
*/
#define NM_BIG_BUF (65535+2)*2
static inline void static inline void
alloc_dnsbuf(isc_nmsocket_t *sock, size_t len) { alloc_dnsbuf(isc_nmsocket_t *sock, size_t len) {
REQUIRE(len <= NM_BIG_BUF); REQUIRE(len <= NM_BIG_BUF);
@@ -66,6 +74,23 @@ alloc_dnsbuf(isc_nmsocket_t *sock, size_t len) {
} }
} }
static void
timer_close_cb(uv_handle_t *handle) {
isc_nmsocket_t *sock = (isc_nmsocket_t *) handle->data;
INSIST(VALID_NMSOCK(sock));
sock->timer_initialized = false;
atomic_store(&sock->closed, true);
isc_nmsocket_detach(&sock);
}
static void
dnstcp_readtimeout(uv_timer_t *timer) {
isc_nmsocket_t *sock = (isc_nmsocket_t *) timer->data;
REQUIRE(VALID_NMSOCK(sock));
isc_nmsocket_detach(&sock->outer);
uv_close((uv_handle_t*) &sock->timer, timer_close_cb);
}
/* /*
* Accept callback for TCP-DNS connection * Accept callback for TCP-DNS connection
*/ */
@@ -94,77 +119,71 @@ dnslisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) {
isc_nmsocket_attach(handle->sock, &dnssock->outer); isc_nmsocket_attach(handle->sock, &dnssock->outer);
dnssock->peer = handle->sock->peer; dnssock->peer = handle->sock->peer;
dnssock->iface = handle->sock->iface; dnssock->iface = handle->sock->iface;
dnssock->read_timeout = 5000;
dnssock->tid = isc_nm_tid();
dnssock->closehandle_cb = resume_processing; dnssock->closehandle_cb = resume_processing;
uv_timer_init(&dnssock->mgr->workers[isc_nm_tid()].loop,
&dnssock->timer);
dnssock->timer.data = dnssock;
dnssock->timer_initialized = true;
uv_timer_start(&dnssock->timer, dnstcp_readtimeout,
dnssock->read_timeout, 0);
isc_nm_read(handle, dnslisten_readcb, dnssock); isc_nm_read(handle, dnslisten_readcb, dnssock);
} }
static bool /*
connection_limit(isc_nmsocket_t *sock) { * Process a single packet from the incoming buffer.
int ah; *
* Return ISC_R_SUCCESS and attach 'handlep' to a handle if something
* was processed; return ISC_R_NOMORE if there isn't a full message
* to be processed.
*
* The caller will need to unreference the handle.
*/
static isc_result_t
processbuffer(isc_nmsocket_t *dnssock, isc_nmhandle_t **handlep) {
size_t len;
REQUIRE(sock->type == isc_nm_tcpdnssocket && sock->outer != NULL);
if (atomic_load(&sock->sequential)) {
/*
* We're already non-pipelining, so there's
* no need to check per-connection limits.
*/
return (false);
}
LOCK(&sock->lock);
ah = sock->ah;
UNLOCK(&sock->lock);
if (ah >= TCPDNS_CLIENTS_PER_CONN) {
atomic_store(&sock->overlimit, true);
isc_nm_pauseread(sock->outer);
return (true);
}
return (false);
}
/* Process all complete packets out of incoming buffer */
static void
processbuffer(isc_nmsocket_t *dnssock) {
REQUIRE(VALID_NMSOCK(dnssock)); REQUIRE(VALID_NMSOCK(dnssock));
REQUIRE(handlep != NULL && *handlep == NULL);
/* While we have a complete packet in the buffer */ /*
while (dnssock->buf_len > 2 && * If we don't even have the length yet, we can't do
dnslen(dnssock->buf) <= dnssock->buf_len - 2 && * anything.
!connection_limit(dnssock)) */
{ if (dnssock->buf_len < 2) {
return (ISC_R_NOMORE);
}
/*
* Process the first packet from the buffer, leaving
* the rest (if any) for later.
*/
len = dnslen(dnssock->buf);
if (len <= dnssock->buf_len - 2) {
isc_nmhandle_t *dnshandle = NULL; isc_nmhandle_t *dnshandle = NULL;
isc_region_t r2 = { isc_region_t r2 = {
.base = dnssock->buf + 2, .base = dnssock->buf + 2,
.length = dnslen(dnssock->buf) .length = len
}; };
size_t len;
dnshandle = isc__nmhandle_get(dnssock, NULL, NULL); dnshandle = isc__nmhandle_get(dnssock, NULL, NULL);
atomic_store(&dnssock->processing, true);
dnssock->rcb.recv(dnshandle, &r2, dnssock->rcbarg); dnssock->rcb.recv(dnshandle, &r2, dnssock->rcbarg);
/* len += 2;
* If the recv callback wants to hold on to the
* handle, it needs to attach to it.
*/
isc_nmhandle_unref(dnshandle);
len = dnslen(dnssock->buf) + 2;
dnssock->buf_len -= len; dnssock->buf_len -= len;
if (len > 0) { if (len > 0) {
memmove(dnssock->buf, dnssock->buf + len, memmove(dnssock->buf, dnssock->buf + len,
dnssock->buf_len); dnssock->buf_len);
} }
/* Check here to make sure we do the processing at least once */ *handlep = dnshandle;
if (atomic_load(&dnssock->processing)) { return (ISC_R_SUCCESS);
return;
}
} }
return (ISC_R_NOMORE);
} }
/* /*
@@ -174,8 +193,8 @@ processbuffer(isc_nmsocket_t *dnssock) {
static void static void
dnslisten_readcb(isc_nmhandle_t *handle, isc_region_t *region, void *arg) { dnslisten_readcb(isc_nmhandle_t *handle, isc_region_t *region, void *arg) {
isc_nmsocket_t *dnssock = (isc_nmsocket_t *) arg; isc_nmsocket_t *dnssock = (isc_nmsocket_t *) arg;
isc_sockaddr_t local;
unsigned char *base = NULL; unsigned char *base = NULL;
bool done = false;
size_t len; size_t len;
REQUIRE(VALID_NMSOCK(dnssock)); REQUIRE(VALID_NMSOCK(dnssock));
@@ -183,133 +202,63 @@ dnslisten_readcb(isc_nmhandle_t *handle, isc_region_t *region, void *arg) {
if (region == NULL) { if (region == NULL) {
/* Connection closed */ /* Connection closed */
atomic_store(&dnssock->closed, true); isc__nm_tcpdns_close(dnssock);
isc_nmsocket_detach(&dnssock->outer);
isc_nmsocket_detach(&dnssock);
return; return;
} }
local = isc_nmhandle_localaddr(handle);
base = region->base; base = region->base;
len = region->length; len = region->length;
/* if (dnssock->buf_len + len > dnssock->buf_size) {
* We have something in the buffer, we need to glue it. alloc_dnsbuf(dnssock, dnssock->buf_len + len);
*/
if (dnssock->buf_len > 0) {
if (dnssock->buf_len == 1) {
/* Make sure we have the length */
dnssock->buf[1] = base[0];
dnssock->buf_len = 2;
base++;
len--;
}
processbuffer(dnssock);
} }
memmove(dnssock->buf + dnssock->buf_len, base, len);
dnssock->buf_len += len;
if (dnssock->buf_len > 0) { do {
size_t plen; isc_result_t result;
isc_nmhandle_t *dnshandle = NULL;
if (dnssock->buf_len == 1) { result = processbuffer(dnssock, &dnshandle);
/* Make sure we have the length */ if (result != ISC_R_SUCCESS) {
dnssock->buf[1] = base[0];
dnssock->buf_len = 2;
base++;
len--;
}
/* At this point we definitely have 2 bytes there. */
plen = ISC_MIN(len, (dnslen(dnssock->buf) + 2 -
dnssock->buf_len));
if (dnssock->buf_len + plen > NM_BIG_BUF) {
/* /*
* XXX: continuing to read will overrun the * There wasn't anything in the buffer to process.
* socket buffer. We may need to force the
* connection to close so the client will have
* to open a new one.
*/ */
return; return;
} }
if (dnssock->buf_len + plen > dnssock->buf_size) {
alloc_dnsbuf(dnssock, dnssock->buf_len + plen);
}
memmove(dnssock->buf + dnssock->buf_len, base, plen);
dnssock->buf_len += plen;
base += plen;
len -= plen;
/* Do we have a complete packet in the buffer? */
if (dnslen(dnssock->buf) >= dnssock->buf_len - 2 &&
!connection_limit(dnssock))
{
isc_nmhandle_t *dnshandle = NULL;
isc_region_t r2 = {
.base = dnssock->buf + 2,
.length = dnslen(dnssock->buf)
};
dnshandle = isc__nmhandle_get(dnssock, NULL, &local);
atomic_store(&dnssock->processing, true);
dnssock->rcb.recv(dnshandle, &r2, dnssock->rcbarg);
dnssock->buf_len = 0;
/*
* If the recv callback wants to hold on to the
* handle, it needs to attach to it.
*/
isc_nmhandle_unref(dnshandle);
}
}
/*
* At this point we've processed whatever was previously in the
* socket buffer. If there are more messages to be found in what
* we've read, and if we're either pipelining or not processing
* anything else currently, then we can process those messages now.
*/
while (len >= 2 && dnslen(base) <= len - 2 &&
(!atomic_load(&dnssock->sequential) ||
!atomic_load(&dnssock->processing)) &&
!connection_limit(dnssock))
{
isc_nmhandle_t *dnshandle = NULL;
isc_region_t r2 = {
.base = base + 2,
.length = dnslen(base)
};
len -= dnslen(base) + 2;
base += dnslen(base) + 2;
dnshandle = isc__nmhandle_get(dnssock, NULL, &local);
atomic_store(&dnssock->processing, true);
dnssock->rcb.recv(dnshandle, &r2, dnssock->rcbarg);
/* /*
* If the recv callback wants to hold on to the * We have a packet: stop timeout timers
* handle, it needs to attach to it.
*/ */
isc_nmhandle_unref(dnshandle); atomic_store(&dnssock->outer->processing, true);
} uv_timer_stop(&dnssock->timer);
/* if (dnssock->sequential) {
* We have less than a full message remaining; it can be /*
* stored in the socket buffer for next time. * We're in sequential mode and we processed
*/ * one packet, so we're done until the next read
if (len > 0) { * completes.
if (len > dnssock->buf_size) { */
alloc_dnsbuf(dnssock, len); isc_nm_pauseread(dnssock->outer);
done = true;
} else {
/*
* We're pipelining, so we now resume processing
* packets until the clients-per-connection limit
* is reached (as determined by the number of
* active handles on the socket). When the limit
* is reached, pause reading.
*/
if (atomic_load(&dnssock->ah) >=
TCPDNS_CLIENTS_PER_CONN)
{
isc_nm_pauseread(dnssock->outer);
done = true;
}
} }
INSIST(len <= dnssock->buf_size); isc_nmhandle_unref(dnshandle);
memmove(dnssock->buf, base, len); } while (!done);
dnssock->buf_len = len;
}
} }
/* /*
@@ -394,23 +343,64 @@ typedef struct tcpsend {
static void static void
resume_processing(void *arg) { resume_processing(void *arg) {
isc_nmsocket_t *sock = (isc_nmsocket_t *) arg; isc_nmsocket_t *sock = (isc_nmsocket_t *) arg;
isc_result_t result;
REQUIRE(VALID_NMSOCK(sock)); REQUIRE(VALID_NMSOCK(sock));
REQUIRE(sock->tid == isc_nm_tid());
if (sock->type != isc_nm_tcpdnssocket || sock->outer == NULL) { if (sock->type != isc_nm_tcpdnssocket || sock->outer == NULL) {
return; return;
} }
/* if (atomic_load(&sock->ah) == 0) {
* If we're in sequential mode or over the /* Nothing is active; sockets can timeout now */
* clients-per-connection limit, the sock can atomic_store(&sock->outer->processing, false);
* resume reading now. uv_timer_start(&sock->timer, dnstcp_readtimeout,
*/ sock->read_timeout, 0);
if (atomic_load(&sock->overlimit) || atomic_load(&sock->sequential)) {
atomic_store(&sock->overlimit, false);
atomic_store(&sock->processing, false);
isc_nm_resumeread(sock->outer);
} }
/*
* For sequential sockets: Process what's in the buffer, or
* if there aren't any messages buffered, resume reading.
*/
if (sock->sequential) {
isc_nmhandle_t *handle = NULL;
result = processbuffer(sock, &handle);
if (result == ISC_R_SUCCESS) {
atomic_store(&sock->outer->processing, true);
uv_timer_stop(&sock->timer);
isc_nmhandle_unref(handle);
} else if (sock->outer != NULL) {
isc_nm_resumeread(sock->outer);
}
return;
}
/*
* For pipelined sockets: If we're under the clients-per-connection
* limit, resume processing until we reach the limit again.
*/
do {
isc_nmhandle_t *dnshandle = NULL;
result = processbuffer(sock, &dnshandle);
if (result != ISC_R_SUCCESS) {
/*
* Nothing in the buffer; resume reading.
*/
if (sock->outer != NULL) {
isc_nm_resumeread(sock->outer);
}
break;
}
uv_timer_stop(&sock->timer);
atomic_store(&sock->outer->processing, true);
isc_nmhandle_unref(dnshandle);
} while (atomic_load(&sock->ah) < TCPDNS_CLIENTS_PER_CONN);
} }
static void static void
@@ -422,19 +412,6 @@ tcpdnssend_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) {
ts->cb(ts->orighandle, result, ts->cbarg); ts->cb(ts->orighandle, result, ts->cbarg);
isc_mem_put(ts->mctx, ts->region.base, ts->region.length); isc_mem_put(ts->mctx, ts->region.base, ts->region.length);
/*
* The response was sent; if we're in sequential or overlimit
* mode, resume processing now.
*/
if (atomic_load(&ts->orighandle->sock->sequential) ||
atomic_load(&ts->orighandle->sock->overlimit))
{
atomic_store(&ts->orighandle->sock->processing, false);
atomic_store(&ts->orighandle->sock->overlimit, false);
processbuffer(ts->orighandle->sock);
isc_nm_resumeread(handle->sock);
}
isc_nmhandle_unref(ts->orighandle); isc_nmhandle_unref(ts->orighandle);
isc_mem_putanddetach(&ts->mctx, ts, sizeof(*ts)); isc_mem_putanddetach(&ts->mctx, ts, sizeof(*ts));
} }
@@ -483,12 +460,11 @@ isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region,
return (isc__nm_tcp_send(t->handle, &t->region, tcpdnssend_cb, t)); return (isc__nm_tcp_send(t->handle, &t->region, tcpdnssend_cb, t));
} }
void void
isc__nm_tcpdns_close(isc_nmsocket_t *sock) { isc__nm_tcpdns_close(isc_nmsocket_t *sock) {
if (sock->outer != NULL) { if (sock->outer != NULL) {
isc_nmsocket_detach(&sock->outer); isc_nmsocket_detach(&sock->outer);
} }
uv_close((uv_handle_t*) &sock->timer, timer_close_cb);
atomic_store(&sock->closed, true);
isc__nmsocket_prep_destroy(sock);
} }