2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-22 01:59:26 +00:00
bind/lib/dns/dispatch.c
Ondřej Surý a5f5f68502
Refactor isc_time_now() to return time, and not result
The isc_time_now() and isc_time_now_hires() were used inconsistently
through the code - either with status check, or without status check,
or via TIME_NOW() macro with RUNTIME_CHECK() on failure.

Refactor the isc_time_now() and isc_time_now_hires() to always fail when
getting current time has failed, and return the isc_time_t value as
return value instead of passing the pointer to result in the argument.
2023-03-31 15:02:06 +02:00

2346 lines
56 KiB
C

/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
/*! \file */
#include <inttypes.h>
#include <stdbool.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <isc/atomic.h>
#include <isc/mem.h>
#include <isc/mutex.h>
#include <isc/net.h>
#include <isc/netmgr.h>
#include <isc/portset.h>
#include <isc/random.h>
#include <isc/stats.h>
#include <isc/string.h>
#include <isc/tid.h>
#include <isc/time.h>
#include <isc/tls.h>
#include <isc/util.h>
#include <dns/acl.h>
#include <dns/dispatch.h>
#include <dns/log.h>
#include <dns/message.h>
#include <dns/stats.h>
#include <dns/transport.h>
#include <dns/types.h>
typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
typedef struct dns_qid {
unsigned int magic;
isc_mutex_t lock;
unsigned int qid_nbuckets; /*%< hash table size */
unsigned int qid_increment; /*%< id increment on collision */
dns_displist_t *qid_table; /*%< the table itself */
} dns_qid_t;
struct dns_dispatchmgr {
/* Unlocked. */
unsigned int magic;
isc_refcount_t references;
isc_mem_t *mctx;
dns_acl_t *blackhole;
isc_stats_t *stats;
isc_nm_t *nm;
/* Locked by "lock". */
isc_mutex_t lock;
ISC_LIST(dns_dispatch_t) list;
dns_qid_t *qid;
in_port_t *v4ports; /*%< available ports for IPv4 */
unsigned int nv4ports; /*%< # of available ports for IPv4 */
in_port_t *v6ports; /*%< available ports for IPv4 */
unsigned int nv6ports; /*%< # of available ports for IPv4 */
};
typedef enum {
DNS_DISPATCHSTATE_NONE = 0UL,
DNS_DISPATCHSTATE_CONNECTING,
DNS_DISPATCHSTATE_CONNECTED,
DNS_DISPATCHSTATE_CANCELED,
} dns_dispatchstate_t;
struct dns_dispentry {
unsigned int magic;
isc_refcount_t references;
dns_dispatch_t *disp;
isc_nmhandle_t *handle; /*%< netmgr handle for UDP connection */
dns_dispatchstate_t state;
dns_transport_t *transport;
isc_tlsctx_cache_t *tlsctx_cache;
unsigned int bucket;
unsigned int retries;
unsigned int timeout;
isc_time_t start;
isc_sockaddr_t local;
isc_sockaddr_t peer;
in_port_t port;
dns_messageid_t id;
dispatch_cb_t connected;
dispatch_cb_t sent;
dispatch_cb_t response;
void *arg;
bool reading;
isc_result_t result;
ISC_LINK(dns_dispentry_t) link;
ISC_LINK(dns_dispentry_t) alink;
ISC_LINK(dns_dispentry_t) plink;
ISC_LINK(dns_dispentry_t) rlink;
};
struct dns_dispatch {
/* Unlocked. */
unsigned int magic; /*%< magic */
uint32_t tid;
dns_dispatchmgr_t *mgr; /*%< dispatch manager */
isc_nmhandle_t *handle; /*%< netmgr handle for TCP connection */
isc_sockaddr_t local; /*%< local address */
in_port_t localport; /*%< local UDP port */
isc_sockaddr_t peer; /*%< peer address (TCP) */
/*% Locked by mgr->lock. */
ISC_LINK(dns_dispatch_t) link;
/* Locked by "lock". */
isc_mutex_t lock; /*%< locks all below */
isc_socktype_t socktype;
dns_dispatchstate_t state;
isc_refcount_t references;
bool reading;
dns_displist_t pending;
dns_displist_t active;
unsigned int requests; /*%< how many requests we have */
unsigned int timedout;
};
#define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
#define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC)
#define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p')
#define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
#define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c')
#define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
#define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p')
#define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
#define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
#define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
/*%
* Number of buckets in the QID hash table, and the value to
* increment the QID by when attempting to avoid collisions.
* The number of buckets should be prime, and the increment
* should be the next higher prime number.
*/
#ifndef DNS_QID_BUCKETS
#define DNS_QID_BUCKETS 16411
#endif /* ifndef DNS_QID_BUCKETS */
#ifndef DNS_QID_INCREMENT
#define DNS_QID_INCREMENT 16433
#endif /* ifndef DNS_QID_INCREMENT */
#if DNS_DISPATCH_TRACE
#define dns_dispentry_ref(ptr) \
dns_dispentry__ref(ptr, __func__, __FILE__, __LINE__)
#define dns_dispentry_unref(ptr) \
dns_dispentry__unref(ptr, __func__, __FILE__, __LINE__)
#define dns_dispentry_attach(ptr, ptrp) \
dns_dispentry__attach(ptr, ptrp, __func__, __FILE__, __LINE__)
#define dns_dispentry_detach(ptrp) \
dns_dispentry__detach(ptrp, __func__, __FILE__, __LINE__)
ISC_REFCOUNT_TRACE_DECL(dns_dispentry);
#else
ISC_REFCOUNT_DECL(dns_dispentry);
#endif
/*
* Statics.
*/
static void
dispatchmgr_destroy(dns_dispatchmgr_t *mgr);
static dns_dispentry_t *
entry_search(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t,
unsigned int);
static void
udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region,
void *arg);
static void
tcp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region,
void *arg);
static uint32_t
dns_hash(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t);
static void
dispentry_cancel(dns_dispentry_t *resp, isc_result_t result);
static isc_result_t
dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr,
dns_dispatch_t **dispp);
static void
qid_allocate(dns_dispatchmgr_t *mgr, dns_qid_t **qidp);
static void
qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
static void
udp_startrecv(isc_nmhandle_t *handle, dns_dispentry_t *resp);
static void
tcp_startrecv(dns_dispatch_t *disp, dns_dispentry_t *resp);
static void
tcp_dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp,
int32_t timeout);
static void
udp_dispatch_getnext(dns_dispentry_t *resp, int32_t timeout);
#define LVL(x) ISC_LOG_DEBUG(x)
static const char *
socktype2str(dns_dispentry_t *resp) {
dns_transport_type_t transport_type = DNS_TRANSPORT_UDP;
dns_dispatch_t *disp = resp->disp;
if (disp->socktype == isc_socktype_tcp) {
if (resp->transport != NULL) {
transport_type =
dns_transport_get_type(resp->transport);
} else {
transport_type = DNS_TRANSPORT_TCP;
}
}
switch (transport_type) {
case DNS_TRANSPORT_UDP:
return ("UDP");
case DNS_TRANSPORT_TCP:
return ("TCP");
case DNS_TRANSPORT_TLS:
return ("TLS");
case DNS_TRANSPORT_HTTP:
return ("HTTP");
default:
return ("<unexpected>");
}
}
static const char *
state2str(dns_dispatchstate_t state) {
switch (state) {
case DNS_DISPATCHSTATE_NONE:
return ("none");
case DNS_DISPATCHSTATE_CONNECTING:
return ("connecting");
case DNS_DISPATCHSTATE_CONNECTED:
return ("connected");
case DNS_DISPATCHSTATE_CANCELED:
return ("canceled");
default:
return ("<unexpected>");
}
}
static void
mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
ISC_FORMAT_PRINTF(3, 4);
static void
mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
char msgbuf[2048];
va_list ap;
if (!isc_log_wouldlog(dns_lctx, level)) {
return;
}
va_start(ap, fmt);
vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
va_end(ap);
isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
DNS_LOGMODULE_DISPATCH, level, "dispatchmgr %p: %s", mgr,
msgbuf);
}
static void
inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
if (mgr->stats != NULL) {
isc_stats_increment(mgr->stats, counter);
}
}
static void
dec_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
if (mgr->stats != NULL) {
isc_stats_decrement(mgr->stats, counter);
}
}
static void
dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
ISC_FORMAT_PRINTF(3, 4);
static void
dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
char msgbuf[2048];
va_list ap;
int r;
if (!isc_log_wouldlog(dns_lctx, level)) {
return;
}
va_start(ap, fmt);
r = vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
if (r < 0) {
msgbuf[0] = '\0';
} else if ((unsigned int)r >= sizeof(msgbuf)) {
/* Truncated */
msgbuf[sizeof(msgbuf) - 1] = '\0';
}
va_end(ap);
isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
DNS_LOGMODULE_DISPATCH, level, "dispatch %p: %s", disp,
msgbuf);
}
static void
dispentry_log(dns_dispentry_t *resp, int level, const char *fmt, ...)
ISC_FORMAT_PRINTF(3, 4);
static void
dispentry_log(dns_dispentry_t *resp, int level, const char *fmt, ...) {
char msgbuf[2048];
va_list ap;
int r;
if (!isc_log_wouldlog(dns_lctx, level)) {
return;
}
va_start(ap, fmt);
r = vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
if (r < 0) {
msgbuf[0] = '\0';
} else if ((unsigned int)r >= sizeof(msgbuf)) {
/* Truncated */
msgbuf[sizeof(msgbuf) - 1] = '\0';
}
va_end(ap);
dispatch_log(resp->disp, level, "%s response %p: %s",
socktype2str(resp), resp, msgbuf);
}
/*
* Return a hash of the destination and message id.
*/
static uint32_t
dns_hash(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id,
in_port_t port) {
uint32_t ret;
ret = isc_sockaddr_hash(dest, true);
ret ^= ((uint32_t)id << 16) | port;
ret %= qid->qid_nbuckets;
INSIST(ret < qid->qid_nbuckets);
return (ret);
}
/*%
* Choose a random port number for a dispatch entry.
* The caller must hold the disp->lock
*/
static isc_result_t
setup_socket(dns_dispatch_t *disp, dns_dispentry_t *resp,
const isc_sockaddr_t *dest, in_port_t *portp) {
dns_dispatchmgr_t *mgr = disp->mgr;
unsigned int nports;
in_port_t *ports = NULL;
in_port_t port = *portp;
if (resp->retries++ > 5) {
return (ISC_R_FAILURE);
}
if (isc_sockaddr_pf(&disp->local) == AF_INET) {
nports = mgr->nv4ports;
ports = mgr->v4ports;
} else {
nports = mgr->nv6ports;
ports = mgr->v6ports;
}
if (nports == 0) {
return (ISC_R_ADDRNOTAVAIL);
}
resp->local = disp->local;
resp->peer = *dest;
if (port == 0) {
port = ports[isc_random_uniform(nports)];
isc_sockaddr_setport(&resp->local, port);
*portp = port;
}
resp->port = port;
return (ISC_R_SUCCESS);
}
/*
* Find an entry for query ID 'id', socket address 'dest', and port number
* 'port'.
* Return NULL if no such entry exists.
*/
static dns_dispentry_t *
entry_search(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id,
in_port_t port, unsigned int bucket) {
dns_dispentry_t *res = NULL;
REQUIRE(VALID_QID(qid));
REQUIRE(bucket < qid->qid_nbuckets);
res = ISC_LIST_HEAD(qid->qid_table[bucket]);
while (res != NULL) {
if (res->id == id && isc_sockaddr_equal(dest, &res->peer) &&
res->port == port)
{
return (res);
}
res = ISC_LIST_NEXT(res, link);
}
return (NULL);
}
static void
dispentry_destroy(dns_dispentry_t *resp) {
dns_dispatch_t *disp = resp->disp;
/*
* We need to call this from here in case there's an external event that
* shuts down our dispatch (like ISC_R_SHUTTINGDOWN).
*/
dispentry_cancel(resp, ISC_R_CANCELED);
LOCK(&disp->lock);
INSIST(disp->requests > 0);
disp->requests--;
UNLOCK(&disp->lock);
isc_refcount_destroy(&resp->references);
resp->magic = 0;
INSIST(!ISC_LINK_LINKED(resp, link));
INSIST(!ISC_LINK_LINKED(resp, plink));
INSIST(!ISC_LINK_LINKED(resp, alink));
INSIST(!ISC_LINK_LINKED(resp, rlink));
dispentry_log(resp, LVL(90), "destroying");
if (resp->handle != NULL) {
dispentry_log(resp, LVL(90), "detaching handle %p from %p",
resp->handle, &resp->handle);
isc_nmhandle_detach(&resp->handle);
}
if (resp->tlsctx_cache != NULL) {
isc_tlsctx_cache_detach(&resp->tlsctx_cache);
}
if (resp->transport != NULL) {
dns_transport_detach(&resp->transport);
}
isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp));
dns_dispatch_detach(&disp); /* DISPATCH001 */
}
#if DNS_DISPATCH_TRACE
ISC_REFCOUNT_TRACE_IMPL(dns_dispentry, dispentry_destroy);
#else
ISC_REFCOUNT_IMPL(dns_dispentry, dispentry_destroy);
#endif
/*
* How long in milliseconds has it been since this dispentry
* started reading? (Only used for UDP, to adjust the timeout
* downward when running getnext.)
*/
static unsigned int
dispentry_runtime(dns_dispentry_t *resp) {
isc_time_t now;
if (isc_time_isepoch(&resp->start)) {
return (0);
}
now = isc_time_now();
return (isc_time_microdiff(&now, &resp->start) / 1000);
}
/*
* General flow:
*
* If I/O result == CANCELED or error, free the buffer.
*
* If query, free the buffer, restart.
*
* If response:
* Allocate event, fill in details.
* If cannot allocate, free buffer, restart.
* find target. If not found, free buffer, restart.
* if event queue is not empty, queue. else, send.
* restart.
*/
static void
udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region,
void *arg) {
dns_dispentry_t *resp = (dns_dispentry_t *)arg;
dns_dispatch_t *disp = NULL;
dns_messageid_t id;
isc_result_t dres;
isc_buffer_t source;
unsigned int flags;
isc_sockaddr_t peer;
isc_netaddr_t netaddr;
int match, timeout = 0;
bool respond = true;
REQUIRE(VALID_RESPONSE(resp));
REQUIRE(VALID_DISPATCH(resp->disp));
disp = resp->disp;
LOCK(&disp->lock);
INSIST(resp->reading);
resp->reading = false;
if (resp->state == DNS_DISPATCHSTATE_CANCELED) {
/*
* Nobody is interested in the callback if the response
* has been canceled already. Detach from the response
* and the handle.
*/
respond = false;
eresult = ISC_R_CANCELED;
}
dispentry_log(resp, LVL(90), "read callback:%s, requests %d",
isc_result_totext(eresult), disp->requests);
if (eresult != ISC_R_SUCCESS) {
/*
* This is most likely a network error on a connected
* socket, a timeout, or the query has been canceled.
* It makes no sense to check the address or parse the
* packet, but we can return the error to the caller.
*/
goto done;
}
peer = isc_nmhandle_peeraddr(handle);
isc_netaddr_fromsockaddr(&netaddr, &peer);
/*
* If this is from a blackholed address, drop it.
*/
if (disp->mgr->blackhole != NULL &&
dns_acl_match(&netaddr, NULL, disp->mgr->blackhole, NULL, &match,
NULL) == ISC_R_SUCCESS &&
match > 0)
{
if (isc_log_wouldlog(dns_lctx, LVL(10))) {
char netaddrstr[ISC_NETADDR_FORMATSIZE];
isc_netaddr_format(&netaddr, netaddrstr,
sizeof(netaddrstr));
dispentry_log(resp, LVL(10),
"blackholed packet from %s", netaddrstr);
}
goto next;
}
/*
* Peek into the buffer to see what we can see.
*/
id = resp->id;
isc_buffer_init(&source, region->base, region->length);
isc_buffer_add(&source, region->length);
dres = dns_message_peekheader(&source, &id, &flags);
if (dres != ISC_R_SUCCESS) {
char netaddrstr[ISC_NETADDR_FORMATSIZE];
isc_netaddr_format(&netaddr, netaddrstr, sizeof(netaddrstr));
dispentry_log(resp, LVL(10), "got garbage packet from %s",
netaddrstr);
goto next;
}
dispentry_log(resp, LVL(92),
"got valid DNS message header, /QR %c, id %u",
(((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id);
/*
* Look at the message flags. If it's a query, ignore it.
*/
if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
goto next;
}
/*
* The QID and the address must match the expected ones.
*/
if (resp->id != id || !isc_sockaddr_equal(&peer, &resp->peer)) {
dispentry_log(resp, LVL(90), "response doesn't match");
inc_stats(disp->mgr, dns_resstatscounter_mismatch);
goto next;
}
/*
* We have the right resp, so call the caller back.
*/
goto done;
next:
/*
* This is the wrong response. Check whether there is still enough
* time to wait for the correct one to arrive before the timeout fires.
*/
timeout = resp->timeout - dispentry_runtime(resp);
if (timeout <= 0) {
/*
* The time window for receiving the correct response is
* already closed, libuv has just not processed the socket
* timer yet. Invoke the read callback, indicating a timeout.
*/
eresult = ISC_R_TIMEDOUT;
goto done;
}
/*
* Do not invoke the read callback just yet and instead wait for the
* proper response to arrive until the original timeout fires.
*/
respond = false;
udp_dispatch_getnext(resp, timeout);
done:
UNLOCK(&disp->lock);
if (respond) {
dispentry_log(resp, LVL(90), "UDP read callback on %p: %s",
handle, isc_result_totext(eresult));
resp->response(eresult, region, resp->arg);
}
dns_dispentry_detach(&resp); /* DISPENTRY003 */
}
static isc_result_t
tcp_recv_oldest(dns_dispatch_t *disp, dns_dispentry_t **respp) {
dns_dispentry_t *resp = NULL;
resp = ISC_LIST_HEAD(disp->active);
if (resp != NULL) {
disp->timedout++;
*respp = resp;
return (ISC_R_TIMEDOUT);
}
return (ISC_R_NOTFOUND);
}
static isc_result_t
tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, dns_qid_t *qid,
isc_sockaddr_t *peer, dns_dispentry_t **respp) {
isc_buffer_t source;
dns_messageid_t id;
unsigned int flags;
unsigned int bucket;
isc_result_t result = ISC_R_SUCCESS;
dns_dispentry_t *resp = NULL;
dispatch_log(disp, LVL(90), "TCP read success, length == %d, addr = %p",
region->length, region->base);
/*
* Peek into the buffer to see what we can see.
*/
isc_buffer_init(&source, region->base, region->length);
isc_buffer_add(&source, region->length);
result = dns_message_peekheader(&source, &id, &flags);
if (result != ISC_R_SUCCESS) {
dispatch_log(disp, LVL(10), "got garbage packet");
return (ISC_R_UNEXPECTED);
}
dispatch_log(disp, LVL(92),
"got valid DNS message header, /QR %c, id %u",
(((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id);
/*
* Look at the message flags. If it's a query, ignore it and keep
* reading.
*/
if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
dispatch_log(disp, LVL(10), "got DNS query instead of answer");
return (ISC_R_UNEXPECTED);
}
/*
* We have a valid response; find the associated dispentry object
* and call the caller back.
*/
bucket = dns_hash(qid, peer, id, disp->localport);
LOCK(&qid->lock);
resp = entry_search(qid, peer, id, disp->localport, bucket);
if (resp != NULL) {
if (resp->reading) {
*respp = resp;
} else {
/*
* We already got a message for this QID and weren't
* expecting any more.
*/
result = ISC_R_UNEXPECTED;
}
} else {
/* We are not expecting this DNS message */
result = ISC_R_NOTFOUND;
}
dispatch_log(disp, LVL(90), "search for response in bucket %d: %s",
bucket, isc_result_totext(result));
UNLOCK(&qid->lock);
return (result);
}
static void
tcp_recv_add(dns_displist_t *resps, dns_dispentry_t *resp,
isc_result_t result) {
dns_dispentry_ref(resp); /* DISPENTRY009 */
ISC_LIST_UNLINK(resp->disp->active, resp, alink);
ISC_LIST_APPEND(*resps, resp, rlink);
INSIST(resp->reading);
resp->reading = false;
resp->result = result;
}
static void
tcp_recv_shutdown(dns_dispatch_t *disp, dns_displist_t *resps,
isc_result_t result) {
dns_dispentry_t *resp = NULL, *next = NULL;
/*
* If there are any active responses, shut them all down.
*/
for (resp = ISC_LIST_HEAD(disp->active); resp != NULL; resp = next) {
next = ISC_LIST_NEXT(resp, alink);
tcp_recv_add(resps, resp, result);
}
disp->state = DNS_DISPATCHSTATE_CANCELED;
}
static void
tcp_recv_processall(dns_displist_t *resps, isc_region_t *region) {
dns_dispentry_t *resp = NULL, *next = NULL;
for (resp = ISC_LIST_HEAD(*resps); resp != NULL; resp = next) {
next = ISC_LIST_NEXT(resp, rlink);
ISC_LIST_UNLINK(*resps, resp, rlink);
dispentry_log(resp, LVL(90), "read callback: %s",
isc_result_totext(resp->result));
resp->response(resp->result, region, resp->arg);
dns_dispentry_detach(&resp); /* DISPENTRY009 */
}
}
/*
* General flow:
*
* If I/O result == CANCELED, EOF, or error, notify everyone as the
* various queues drain.
*
* If response:
* Allocate event, fill in details.
* If cannot allocate, restart.
* find target. If not found, restart.
* if event queue is not empty, queue. else, send.
* restart.
*/
static void
tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region,
void *arg) {
dns_dispatch_t *disp = (dns_dispatch_t *)arg;
dns_dispentry_t *resp = NULL;
dns_qid_t *qid = NULL;
char buf[ISC_SOCKADDR_FORMATSIZE];
isc_sockaddr_t peer;
dns_displist_t resps = ISC_LIST_INITIALIZER;
REQUIRE(VALID_DISPATCH(disp));
qid = disp->mgr->qid;
LOCK(&disp->lock);
INSIST(disp->reading);
disp->reading = false;
dispatch_log(disp, LVL(90), "TCP read:%s:requests %u",
isc_result_totext(result), disp->requests);
peer = isc_nmhandle_peeraddr(handle);
/*
* Phase 1: Process timeout and success.
*/
switch (result) {
case ISC_R_TIMEDOUT:
/*
* Time out the oldest response in the active queue.
*/
result = tcp_recv_oldest(disp, &resp);
break;
case ISC_R_SUCCESS:
/* We got an answer */
result = tcp_recv_success(disp, region, qid, &peer, &resp);
break;
default:
break;
}
if (resp != NULL) {
tcp_recv_add(&resps, resp, result);
}
/*
* Phase 2: Look if we timed out before.
*/
if (result == ISC_R_NOTFOUND) {
if (disp->timedout > 0) {
/* There was active query that timed-out before */
disp->timedout--;
} else {
result = ISC_R_UNEXPECTED;
}
}
/*
* Phase 3: Trigger timeouts. It's possible that the responses would
* have been timed out out already, but non-matching TCP reads have
* prevented this.
*/
dns_dispentry_t *next = NULL;
for (resp = ISC_LIST_HEAD(disp->active); resp != NULL; resp = next) {
next = ISC_LIST_NEXT(resp, alink);
/* FIXME: dispentry_runtime is always 0 for TCP */
int timeout = resp->timeout - dispentry_runtime(resp);
if (timeout <= 0) {
tcp_recv_add(&resps, resp, ISC_R_TIMEDOUT);
}
}
/*
* Phase 4: log if we errored out.
*/
switch (result) {
case ISC_R_SUCCESS:
case ISC_R_TIMEDOUT:
case ISC_R_NOTFOUND:
break;
case ISC_R_SHUTTINGDOWN:
case ISC_R_CANCELED:
case ISC_R_EOF:
case ISC_R_CONNECTIONRESET:
isc_sockaddr_format(&peer, buf, sizeof(buf));
dispatch_log(disp, LVL(90), "shutting down TCP: %s: %s", buf,
isc_result_totext(result));
tcp_recv_shutdown(disp, &resps, result);
break;
default:
isc_sockaddr_format(&peer, buf, sizeof(buf));
dispatch_log(disp, ISC_LOG_ERROR,
"shutting down due to TCP "
"receive error: %s: %s",
buf, isc_result_totext(result));
tcp_recv_shutdown(disp, &resps, result);
break;
}
/*
* Phase 5: Resume reading if there are still active responses
*/
if (!ISC_LIST_EMPTY(disp->active)) {
tcp_startrecv(disp, ISC_LIST_HEAD(disp->active));
}
UNLOCK(&disp->lock);
/*
* Phase 6: Process all scheduled callbacks.
*/
tcp_recv_processall(&resps, region);
dns_dispatch_detach(&disp); /* DISPATCH002 */
}
/*%
* Create a temporary port list to set the initial default set of dispatch
* ephemeral ports. This is almost meaningless as the application will
* normally set the ports explicitly, but is provided to fill some minor corner
* cases.
*/
static void
create_default_portset(isc_mem_t *mctx, int family, isc_portset_t **portsetp) {
in_port_t low, high;
isc_net_getudpportrange(family, &low, &high);
isc_portset_create(mctx, portsetp);
isc_portset_addrange(*portsetp, low, high);
}
static isc_result_t
setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
isc_portset_t *v6portset) {
in_port_t *v4ports, *v6ports, p = 0;
unsigned int nv4ports, nv6ports, i4 = 0, i6 = 0;
nv4ports = isc_portset_nports(v4portset);
nv6ports = isc_portset_nports(v6portset);
v4ports = NULL;
if (nv4ports != 0) {
v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
}
v6ports = NULL;
if (nv6ports != 0) {
v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
}
do {
if (isc_portset_isset(v4portset, p)) {
INSIST(i4 < nv4ports);
v4ports[i4++] = p;
}
if (isc_portset_isset(v6portset, p)) {
INSIST(i6 < nv6ports);
v6ports[i6++] = p;
}
} while (p++ < 65535);
INSIST(i4 == nv4ports && i6 == nv6ports);
if (mgr->v4ports != NULL) {
isc_mem_put(mgr->mctx, mgr->v4ports,
mgr->nv4ports * sizeof(in_port_t));
}
mgr->v4ports = v4ports;
mgr->nv4ports = nv4ports;
if (mgr->v6ports != NULL) {
isc_mem_put(mgr->mctx, mgr->v6ports,
mgr->nv6ports * sizeof(in_port_t));
}
mgr->v6ports = v6ports;
mgr->nv6ports = nv6ports;
return (ISC_R_SUCCESS);
}
/*
* Publics.
*/
isc_result_t
dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm,
dns_dispatchmgr_t **mgrp) {
dns_dispatchmgr_t *mgr = NULL;
isc_portset_t *v4portset = NULL;
isc_portset_t *v6portset = NULL;
REQUIRE(mctx != NULL);
REQUIRE(mgrp != NULL && *mgrp == NULL);
mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
*mgr = (dns_dispatchmgr_t){ .magic = 0 };
#if DNS_DISPATCH_TRACE
fprintf(stderr, "dns_dispatchmgr__init:%s:%s:%d:%p->references = 1\n",
__func__, __FILE__, __LINE__, mgr);
#endif
isc_refcount_init(&mgr->references, 1);
isc_mem_attach(mctx, &mgr->mctx);
isc_nm_attach(nm, &mgr->nm);
isc_mutex_init(&mgr->lock);
ISC_LIST_INIT(mgr->list);
create_default_portset(mctx, AF_INET, &v4portset);
create_default_portset(mctx, AF_INET6, &v6portset);
setavailports(mgr, v4portset, v6portset);
isc_portset_destroy(mctx, &v4portset);
isc_portset_destroy(mctx, &v6portset);
qid_allocate(mgr, &mgr->qid);
mgr->magic = DNS_DISPATCHMGR_MAGIC;
*mgrp = mgr;
return (ISC_R_SUCCESS);
}
#if DNS_DISPATCH_TRACE
ISC_REFCOUNT_TRACE_IMPL(dns_dispatchmgr, dispatchmgr_destroy);
#else
ISC_REFCOUNT_IMPL(dns_dispatchmgr, dispatchmgr_destroy);
#endif
void
dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
REQUIRE(VALID_DISPATCHMGR(mgr));
if (mgr->blackhole != NULL) {
dns_acl_detach(&mgr->blackhole);
}
dns_acl_attach(blackhole, &mgr->blackhole);
}
dns_acl_t *
dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
REQUIRE(VALID_DISPATCHMGR(mgr));
return (mgr->blackhole);
}
isc_result_t
dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
isc_portset_t *v6portset) {
REQUIRE(VALID_DISPATCHMGR(mgr));
return (setavailports(mgr, v4portset, v6portset));
}
static void
dispatchmgr_destroy(dns_dispatchmgr_t *mgr) {
REQUIRE(VALID_DISPATCHMGR(mgr));
isc_refcount_destroy(&mgr->references);
mgr->magic = 0;
isc_mutex_destroy(&mgr->lock);
qid_destroy(mgr->mctx, &mgr->qid);
if (mgr->blackhole != NULL) {
dns_acl_detach(&mgr->blackhole);
}
if (mgr->stats != NULL) {
isc_stats_detach(&mgr->stats);
}
if (mgr->v4ports != NULL) {
isc_mem_put(mgr->mctx, mgr->v4ports,
mgr->nv4ports * sizeof(in_port_t));
}
if (mgr->v6ports != NULL) {
isc_mem_put(mgr->mctx, mgr->v6ports,
mgr->nv6ports * sizeof(in_port_t));
}
isc_nm_detach(&mgr->nm);
isc_mem_putanddetach(&mgr->mctx, mgr, sizeof(dns_dispatchmgr_t));
}
void
dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
REQUIRE(VALID_DISPATCHMGR(mgr));
REQUIRE(ISC_LIST_EMPTY(mgr->list));
REQUIRE(mgr->stats == NULL);
isc_stats_attach(stats, &mgr->stats);
}
static void
qid_allocate(dns_dispatchmgr_t *mgr, dns_qid_t **qidp) {
dns_qid_t *qid = NULL;
unsigned int i;
REQUIRE(qidp != NULL && *qidp == NULL);
qid = isc_mem_get(mgr->mctx, sizeof(*qid));
*qid = (dns_qid_t){ .qid_nbuckets = DNS_QID_BUCKETS,
.qid_increment = DNS_QID_INCREMENT };
qid->qid_table = isc_mem_get(mgr->mctx,
DNS_QID_BUCKETS * sizeof(dns_displist_t));
for (i = 0; i < qid->qid_nbuckets; i++) {
ISC_LIST_INIT(qid->qid_table[i]);
}
isc_mutex_init(&qid->lock);
qid->magic = QID_MAGIC;
*qidp = qid;
}
static void
qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
dns_qid_t *qid = NULL;
REQUIRE(qidp != NULL);
qid = *qidp;
*qidp = NULL;
REQUIRE(VALID_QID(qid));
qid->magic = 0;
isc_mem_put(mctx, qid->qid_table,
qid->qid_nbuckets * sizeof(dns_displist_t));
isc_mutex_destroy(&qid->lock);
isc_mem_put(mctx, qid, sizeof(*qid));
}
/*
* Allocate and set important limits.
*/
static void
dispatch_allocate(dns_dispatchmgr_t *mgr, isc_socktype_t type,
dns_dispatch_t **dispp) {
dns_dispatch_t *disp = NULL;
REQUIRE(VALID_DISPATCHMGR(mgr));
REQUIRE(dispp != NULL && *dispp == NULL);
/*
* Set up the dispatcher, mostly. Don't bother setting some of
* the options that are controlled by tcp vs. udp, etc.
*/
disp = isc_mem_get(mgr->mctx, sizeof(*disp));
*disp = (dns_dispatch_t){
.socktype = type,
.link = ISC_LINK_INITIALIZER,
.active = ISC_LIST_INITIALIZER,
.pending = ISC_LIST_INITIALIZER,
.tid = isc_tid(),
.magic = DISPATCH_MAGIC,
};
dns_dispatchmgr_attach(mgr, &disp->mgr);
#if DNS_DISPATCH_TRACE
fprintf(stderr, "dns_dispatch__init:%s:%s:%d:%p->references = 1\n",
__func__, __FILE__, __LINE__, disp);
#endif
isc_refcount_init(&disp->references, 1); /* DISPATCH000 */
isc_mutex_init(&disp->lock);
*dispp = disp;
}
isc_result_t
dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr,
const isc_sockaddr_t *destaddr, dns_dispatch_t **dispp) {
dns_dispatch_t *disp = NULL;
REQUIRE(VALID_DISPATCHMGR(mgr));
REQUIRE(destaddr != NULL);
LOCK(&mgr->lock);
dispatch_allocate(mgr, isc_socktype_tcp, &disp);
disp->peer = *destaddr;
if (localaddr != NULL) {
disp->local = *localaddr;
} else {
int pf;
pf = isc_sockaddr_pf(destaddr);
isc_sockaddr_anyofpf(&disp->local, pf);
isc_sockaddr_setport(&disp->local, 0);
}
/*
* Append it to the dispatcher list.
*/
/* FIXME: There should be a lookup hashtable here */
ISC_LIST_APPEND(mgr->list, disp, link);
UNLOCK(&mgr->lock);
if (isc_log_wouldlog(dns_lctx, 90)) {
char addrbuf[ISC_SOCKADDR_FORMATSIZE];
isc_sockaddr_format(&disp->local, addrbuf,
ISC_SOCKADDR_FORMATSIZE);
mgr_log(mgr, LVL(90),
"dns_dispatch_createtcp: created TCP dispatch %p for "
"%s",
disp, addrbuf);
}
*dispp = disp;
return (ISC_R_SUCCESS);
}
isc_result_t
dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr,
const isc_sockaddr_t *localaddr, dns_dispatch_t **dispp) {
dns_dispatch_t *disp_connected = NULL;
dns_dispatch_t *disp_fallback = NULL;
isc_result_t result = ISC_R_NOTFOUND;
REQUIRE(VALID_DISPATCHMGR(mgr));
REQUIRE(destaddr != NULL);
REQUIRE(dispp != NULL && *dispp == NULL);
LOCK(&mgr->lock);
for (dns_dispatch_t *disp = ISC_LIST_HEAD(mgr->list); disp != NULL;
disp = ISC_LIST_NEXT(disp, link))
{
isc_sockaddr_t sockname;
isc_sockaddr_t peeraddr;
LOCK(&disp->lock);
if (disp->tid != isc_tid()) {
UNLOCK(&disp->lock);
continue;
}
if (disp->handle != NULL) {
sockname = isc_nmhandle_localaddr(disp->handle);
peeraddr = isc_nmhandle_peeraddr(disp->handle);
} else {
sockname = disp->local;
peeraddr = disp->peer;
}
/*
* The conditions match:
* 1. socktype is TCP
* 2. destination address is same
* 3. local address is either NULL or same
*/
if (disp->socktype != isc_socktype_tcp ||
!isc_sockaddr_equal(destaddr, &peeraddr) ||
(localaddr != NULL &&
!isc_sockaddr_eqaddr(localaddr, &sockname)))
{
UNLOCK(&disp->lock);
continue;
}
switch (disp->state) {
case DNS_DISPATCHSTATE_NONE:
/* A dispatch in indeterminate state, skip it */
break;
case DNS_DISPATCHSTATE_CONNECTED:
if (ISC_LIST_EMPTY(disp->active)) {
/* Ignore dispatch with no responses */
break;
}
/* We found a connected dispatch */
dns_dispatch_attach(disp, &disp_connected);
break;
case DNS_DISPATCHSTATE_CONNECTING:
if (ISC_LIST_EMPTY(disp->pending)) {
/* Ignore dispatch with no responses */
break;
}
/* We found "a" dispatch, store it for later */
if (disp_fallback == NULL) {
dns_dispatch_attach(disp, &disp_fallback);
}
break;
case DNS_DISPATCHSTATE_CANCELED:
/* A canceled dispatch, skip it. */
break;
default:
UNREACHABLE();
}
UNLOCK(&disp->lock);
if (disp_connected != NULL) {
break;
}
}
if (disp_connected != NULL) {
/* We found connected dispatch */
INSIST(disp_connected->handle != NULL);
*dispp = disp_connected;
disp_connected = NULL;
result = ISC_R_SUCCESS;
if (disp_fallback != NULL) {
dns_dispatch_detach(&disp_fallback);
}
} else if (disp_fallback != NULL) {
*dispp = disp_fallback;
result = ISC_R_SUCCESS;
}
UNLOCK(&mgr->lock);
return (result);
}
isc_result_t
dns_dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr,
dns_dispatch_t **dispp) {
isc_result_t result;
dns_dispatch_t *disp = NULL;
REQUIRE(VALID_DISPATCHMGR(mgr));
REQUIRE(localaddr != NULL);
REQUIRE(dispp != NULL && *dispp == NULL);
LOCK(&mgr->lock);
result = dispatch_createudp(mgr, localaddr, &disp);
if (result == ISC_R_SUCCESS) {
*dispp = disp;
}
UNLOCK(&mgr->lock);
return (result);
}
static isc_result_t
dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr,
dns_dispatch_t **dispp) {
isc_result_t result = ISC_R_SUCCESS;
dns_dispatch_t *disp = NULL;
isc_sockaddr_t sa_any;
/*
* Check whether this address/port is available locally.
*/
isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
result = isc_nm_checkaddr(localaddr, isc_socktype_udp);
if (result != ISC_R_SUCCESS) {
return (result);
}
}
dispatch_allocate(mgr, isc_socktype_udp, &disp);
if (isc_log_wouldlog(dns_lctx, 90)) {
char addrbuf[ISC_SOCKADDR_FORMATSIZE];
isc_sockaddr_format(localaddr, addrbuf,
ISC_SOCKADDR_FORMATSIZE);
mgr_log(mgr, LVL(90),
"dispatch_createudp: created UDP dispatch %p for %s",
disp, addrbuf);
}
disp->local = *localaddr;
/*
* Don't append it to the dispatcher list, we don't care about UDP, only
* TCP should be searched
*
* ISC_LIST_APPEND(mgr->list, disp, link);
*/
*dispp = disp;
return (result);
}
static void
dispatch_destroy(dns_dispatch_t *disp) {
dns_dispatchmgr_t *mgr = disp->mgr;
isc_refcount_destroy(&disp->references);
disp->magic = 0;
LOCK(&mgr->lock);
if (ISC_LINK_LINKED(disp, link)) {
ISC_LIST_UNLINK(disp->mgr->list, disp, link);
}
UNLOCK(&mgr->lock);
INSIST(disp->requests == 0);
INSIST(ISC_LIST_EMPTY(disp->pending));
INSIST(ISC_LIST_EMPTY(disp->active));
INSIST(!ISC_LINK_LINKED(disp, link));
dispatch_log(disp, LVL(90), "destroying dispatch %p", disp);
if (disp->handle) {
dispatch_log(disp, LVL(90), "detaching TCP handle %p from %p",
disp->handle, &disp->handle);
isc_nmhandle_detach(&disp->handle);
}
isc_mutex_destroy(&disp->lock);
isc_mem_put(mgr->mctx, disp, sizeof(*disp));
/*
* Because dispatch uses mgr->mctx, we must detach after freeing
* dispatch, not before.
*/
dns_dispatchmgr_detach(&mgr);
}
#if DNS_DISPATCH_TRACE
ISC_REFCOUNT_TRACE_IMPL(dns_dispatch, dispatch_destroy);
#else
ISC_REFCOUNT_IMPL(dns_dispatch, dispatch_destroy);
#endif
isc_result_t
dns_dispatch_add(dns_dispatch_t *disp, unsigned int options,
unsigned int timeout, const isc_sockaddr_t *dest,
dns_transport_t *transport, isc_tlsctx_cache_t *tlsctx_cache,
dispatch_cb_t connected, dispatch_cb_t sent,
dispatch_cb_t response, void *arg, dns_messageid_t *idp,
dns_dispentry_t **respp) {
dns_dispentry_t *resp = NULL;
dns_qid_t *qid = NULL;
in_port_t dispport, localport = 0;
dns_messageid_t id;
unsigned int bucket;
bool ok = false;
int i = 0;
REQUIRE(VALID_DISPATCH(disp));
REQUIRE(dest != NULL);
REQUIRE(respp != NULL && *respp == NULL);
REQUIRE(idp != NULL);
REQUIRE(disp->socktype == isc_socktype_tcp ||
disp->socktype == isc_socktype_udp);
REQUIRE(connected != NULL);
REQUIRE(response != NULL);
REQUIRE(sent != NULL);
LOCK(&disp->lock);
if (disp->state == DNS_DISPATCHSTATE_CANCELED) {
UNLOCK(&disp->lock);
return (ISC_R_CANCELED);
}
qid = disp->mgr->qid;
dispport = isc_sockaddr_getport(&disp->local);
if (dispport != 0) {
localport = dispport;
}
resp = isc_mem_get(disp->mgr->mctx, sizeof(*resp));
*resp = (dns_dispentry_t){
.port = localport,
.timeout = timeout,
.peer = *dest,
.connected = connected,
.sent = sent,
.response = response,
.arg = arg,
.link = ISC_LINK_INITIALIZER,
.alink = ISC_LINK_INITIALIZER,
.plink = ISC_LINK_INITIALIZER,
.rlink = ISC_LINK_INITIALIZER,
.magic = RESPONSE_MAGIC,
};
#if DNS_DISPATCH_TRACE
fprintf(stderr, "dns_dispentry__init:%s:%s:%d:%p->references = 1\n",
__func__, __FILE__, __LINE__, res);
#endif
isc_refcount_init(&resp->references, 1); /* DISPENTRY000 */
if (disp->socktype == isc_socktype_udp) {
isc_result_t result = setup_socket(disp, resp, dest,
&localport);
if (result != ISC_R_SUCCESS) {
isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp));
UNLOCK(&disp->lock);
inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
return (result);
}
}
/*
* Try somewhat hard to find a unique ID. Start with
* a random number unless DNS_DISPATCHOPT_FIXEDID is set,
* in which case we start with the ID passed in via *idp.
*/
if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) {
id = *idp;
} else {
id = (dns_messageid_t)isc_random16();
}
LOCK(&qid->lock);
do {
dns_dispentry_t *entry = NULL;
bucket = dns_hash(qid, dest, id, localport);
entry = entry_search(qid, dest, id, localport, bucket);
if (entry == NULL) {
ok = true;
break;
}
if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) {
/* When using fixed ID, we either must use it or fail */
break;
}
id += qid->qid_increment;
id &= 0x0000ffff;
} while (i++ < 64);
if (ok) {
resp->id = id;
resp->bucket = bucket;
ISC_LIST_APPEND(qid->qid_table[bucket], resp, link);
}
UNLOCK(&qid->lock);
if (!ok) {
isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp));
UNLOCK(&disp->lock);
return (ISC_R_NOMORE);
}
if (transport != NULL) {
dns_transport_attach(transport, &resp->transport);
}
if (tlsctx_cache != NULL) {
isc_tlsctx_cache_attach(tlsctx_cache, &resp->tlsctx_cache);
}
dns_dispatch_attach(disp, &resp->disp); /* DISPATCH001 */
disp->requests++;
inc_stats(disp->mgr, (disp->socktype == isc_socktype_udp)
? dns_resstatscounter_disprequdp
: dns_resstatscounter_dispreqtcp);
UNLOCK(&disp->lock);
*idp = id;
*respp = resp;
return (ISC_R_SUCCESS);
}
isc_result_t
dns_dispatch_getnext(dns_dispentry_t *resp) {
REQUIRE(VALID_RESPONSE(resp));
REQUIRE(VALID_DISPATCH(resp->disp));
dns_dispatch_t *disp = resp->disp;
isc_result_t result = ISC_R_SUCCESS;
int32_t timeout = -1;
dispentry_log(resp, LVL(90), "getnext for QID %d", resp->id);
LOCK(&disp->lock);
switch (disp->socktype) {
case isc_socktype_udp: {
timeout = resp->timeout - dispentry_runtime(resp);
if (timeout <= 0) {
result = ISC_R_TIMEDOUT;
break;
}
udp_dispatch_getnext(resp, timeout);
break;
}
case isc_socktype_tcp:
tcp_dispatch_getnext(disp, resp, timeout);
break;
default:
UNREACHABLE();
}
UNLOCK(&disp->lock);
return (result);
}
static void
udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) {
REQUIRE(VALID_RESPONSE(resp));
REQUIRE(VALID_DISPATCH(resp->disp));
REQUIRE(VALID_DISPATCHMGR(resp->disp->mgr));
dns_dispatch_t *disp = resp->disp;
dns_dispatchmgr_t *mgr = disp->mgr;
dns_qid_t *qid = mgr->qid;
bool respond = false;
LOCK(&disp->lock);
dispentry_log(resp, LVL(90),
"canceling response: %s, %s/%s (%s/%s), "
"requests %u",
isc_result_totext(result), state2str(resp->state),
resp->reading ? "reading" : "not reading",
state2str(disp->state),
disp->reading ? "reading" : "not reading",
disp->requests);
if (ISC_LINK_LINKED(resp, alink)) {
ISC_LIST_UNLINK(disp->active, resp, alink);
}
switch (resp->state) {
case DNS_DISPATCHSTATE_NONE:
break;
case DNS_DISPATCHSTATE_CONNECTING:
break;
case DNS_DISPATCHSTATE_CONNECTED:
if (resp->reading) {
respond = true;
dispentry_log(resp, LVL(90), "canceling read on %p",
resp->handle);
isc_nm_cancelread(resp->handle);
}
break;
case DNS_DISPATCHSTATE_CANCELED:
goto unlock;
default:
UNREACHABLE();
}
dec_stats(disp->mgr, dns_resstatscounter_disprequdp);
LOCK(&qid->lock);
ISC_LIST_UNLINK(qid->qid_table[resp->bucket], resp, link);
UNLOCK(&qid->lock);
resp->state = DNS_DISPATCHSTATE_CANCELED;
unlock:
UNLOCK(&disp->lock);
if (respond) {
dispentry_log(resp, LVL(90), "read callback: %s",
isc_result_totext(result));
resp->response(result, NULL, resp->arg);
}
}
static void
tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) {
REQUIRE(VALID_RESPONSE(resp));
REQUIRE(VALID_DISPATCH(resp->disp));
REQUIRE(VALID_DISPATCHMGR(resp->disp->mgr));
dns_dispatch_t *disp = resp->disp;
dns_dispatchmgr_t *mgr = disp->mgr;
dns_qid_t *qid = mgr->qid;
dns_displist_t resps = ISC_LIST_INITIALIZER;
LOCK(&disp->lock);
dispentry_log(resp, LVL(90),
"canceling response: %s, %s/%s (%s/%s), "
"requests %u",
isc_result_totext(result), state2str(resp->state),
resp->reading ? "reading" : "not reading",
state2str(disp->state),
disp->reading ? "reading" : "not reading",
disp->requests);
switch (resp->state) {
case DNS_DISPATCHSTATE_NONE:
break;
case DNS_DISPATCHSTATE_CONNECTING:
break;
case DNS_DISPATCHSTATE_CONNECTED:
if (resp->reading) {
tcp_recv_add(&resps, resp, ISC_R_CANCELED);
}
INSIST(!ISC_LINK_LINKED(resp, alink));
if (ISC_LIST_EMPTY(disp->active)) {
INSIST(disp->handle != NULL);
#if DISPATCH_TCP_KEEPALIVE
/*
* This is an experimental code that keeps the TCP
* connection open for 1 second before it is finally
* closed. By keeping the TCP connection open, it can
* be reused by dns_request that uses
* dns_dispatch_gettcp() to join existing TCP
* connections.
*
* It is disabled for now, because it changes the
* behaviour, but I am keeping the code here for future
* reference when we improve the dns_dispatch to reuse
* the TCP connections also in the resolver.
*
* The TCP connection reuse should be seamless and not
* require any extra handling on the client side though.
*/
isc_nmhandle_cleartimeout(disp->handle);
isc_nmhandle_settimeout(disp->handle, 1000);
if (!disp->reading) {
dispentry_log(resp, LVL(90),
"final 1 second timeout on %p",
disp->handle);
tcp_startrecv(disp, NULL);
}
#else
if (disp->reading) {
dispentry_log(resp, LVL(90),
"canceling read on %p",
disp->handle);
isc_nm_cancelread(disp->handle);
}
#endif
}
break;
case DNS_DISPATCHSTATE_CANCELED:
goto unlock;
default:
UNREACHABLE();
}
dec_stats(disp->mgr, dns_resstatscounter_dispreqtcp);
LOCK(&qid->lock);
ISC_LIST_UNLINK(qid->qid_table[resp->bucket], resp, link);
UNLOCK(&qid->lock);
resp->state = DNS_DISPATCHSTATE_CANCELED;
unlock:
UNLOCK(&disp->lock);
/*
* NOTE: Calling the response callback directly from here should be done
* asynchronously, as the dns_dispatch_done() is usually called directly
* from the response callback, so there's a slight chance that the call
* stack will get higher here, but it's mitigated by the ".reading"
* flag, so we don't ever go into a loop.
*/
tcp_recv_processall(&resps, NULL);
}
static void
dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) {
REQUIRE(VALID_RESPONSE(resp));
REQUIRE(VALID_DISPATCH(resp->disp));
dns_dispatch_t *disp = resp->disp;
switch (disp->socktype) {
case isc_socktype_udp:
udp_dispentry_cancel(resp, result);
break;
case isc_socktype_tcp:
tcp_dispentry_cancel(resp, result);
break;
default:
UNREACHABLE();
}
}
void
dns_dispatch_done(dns_dispentry_t **respp) {
REQUIRE(VALID_RESPONSE(*respp));
dns_dispentry_t *resp = *respp;
*respp = NULL;
dispentry_cancel(resp, ISC_R_CANCELED);
dns_dispentry_detach(&resp); /* DISPENTRY000 */
}
static void
udp_startrecv(isc_nmhandle_t *handle, dns_dispentry_t *resp) {
REQUIRE(VALID_RESPONSE(resp));
resp->start = isc_time_now();
dispentry_log(resp, LVL(90), "attaching handle %p to %p", handle,
&resp->handle);
isc_nmhandle_attach(handle, &resp->handle);
dns_dispentry_ref(resp); /* DISPENTRY003 */
dispentry_log(resp, LVL(90), "reading");
isc_nm_read(resp->handle, udp_recv, resp);
resp->reading = true;
}
static void
tcp_startrecv(dns_dispatch_t *disp, dns_dispentry_t *resp) {
REQUIRE(VALID_DISPATCH(disp));
REQUIRE(disp->socktype == isc_socktype_tcp);
dns_dispatch_ref(disp); /* DISPATCH002 */
if (resp != NULL) {
dispentry_log(resp, LVL(90), "reading from %p", disp->handle);
} else {
dispatch_log(disp, LVL(90),
"TCP reading without response from %p",
disp->handle);
}
isc_nm_read(disp->handle, tcp_recv, disp);
disp->reading = true;
}
static void
tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) {
dns_dispatch_t *disp = (dns_dispatch_t *)arg;
dns_dispentry_t *resp = NULL;
dns_dispentry_t *next = NULL;
dns_displist_t resps = ISC_LIST_INITIALIZER;
if (isc_log_wouldlog(dns_lctx, 90)) {
char localbuf[ISC_SOCKADDR_FORMATSIZE];
char peerbuf[ISC_SOCKADDR_FORMATSIZE];
if (handle != NULL) {
isc_sockaddr_t local = isc_nmhandle_localaddr(handle);
isc_sockaddr_t peer = isc_nmhandle_peeraddr(handle);
isc_sockaddr_format(&local, localbuf,
ISC_SOCKADDR_FORMATSIZE);
isc_sockaddr_format(&peer, peerbuf,
ISC_SOCKADDR_FORMATSIZE);
} else {
isc_sockaddr_format(&disp->local, localbuf,
ISC_SOCKADDR_FORMATSIZE);
isc_sockaddr_format(&disp->peer, peerbuf,
ISC_SOCKADDR_FORMATSIZE);
}
dispatch_log(disp, LVL(90), "connected from %s to %s: %s",
localbuf, peerbuf, isc_result_totext(eresult));
}
LOCK(&disp->lock);
INSIST(disp->state == DNS_DISPATCHSTATE_CONNECTING);
/*
* If there are pending responses, call the connect
* callbacks for all of them.
*/
for (resp = ISC_LIST_HEAD(disp->pending); resp != NULL; resp = next) {
next = ISC_LIST_NEXT(resp, plink);
ISC_LIST_UNLINK(disp->pending, resp, plink);
ISC_LIST_APPEND(resps, resp, rlink);
resp->result = eresult;
if (resp->state == DNS_DISPATCHSTATE_CANCELED) {
resp->result = ISC_R_CANCELED;
} else if (eresult == ISC_R_SUCCESS) {
resp->state = DNS_DISPATCHSTATE_CONNECTED;
ISC_LIST_APPEND(disp->active, resp, alink);
resp->reading = true;
dispentry_log(resp, LVL(90), "start reading");
} else {
resp->state = DNS_DISPATCHSTATE_NONE;
}
}
if (ISC_LIST_EMPTY(disp->active)) {
/* All responses have been canceled */
disp->state = DNS_DISPATCHSTATE_CANCELED;
} else if (eresult == ISC_R_SUCCESS) {
disp->state = DNS_DISPATCHSTATE_CONNECTED;
isc_nmhandle_attach(handle, &disp->handle);
tcp_startrecv(disp, resp);
} else {
disp->state = DNS_DISPATCHSTATE_NONE;
}
UNLOCK(&disp->lock);
for (resp = ISC_LIST_HEAD(resps); resp != NULL; resp = next) {
next = ISC_LIST_NEXT(resp, rlink);
ISC_LIST_UNLINK(resps, resp, rlink);
dispentry_log(resp, LVL(90), "connect callback: %s",
isc_result_totext(resp->result));
resp->connected(resp->result, NULL, resp->arg);
dns_dispentry_detach(&resp); /* DISPENTRY005 */
}
dns_dispatch_detach(&disp); /* DISPATCH003 */
}
static void
udp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp);
static void
udp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) {
dns_dispentry_t *resp = (dns_dispentry_t *)arg;
dns_dispatch_t *disp = resp->disp;
dispentry_log(resp, LVL(90), "connected: %s",
isc_result_totext(eresult));
LOCK(&disp->lock);
switch (resp->state) {
case DNS_DISPATCHSTATE_CANCELED:
eresult = ISC_R_CANCELED;
ISC_LIST_UNLINK(disp->pending, resp, plink);
goto unlock;
case DNS_DISPATCHSTATE_CONNECTING:
ISC_LIST_UNLINK(disp->pending, resp, plink);
break;
default:
UNREACHABLE();
}
switch (eresult) {
case ISC_R_CANCELED:
break;
case ISC_R_SUCCESS:
resp->state = DNS_DISPATCHSTATE_CONNECTED;
udp_startrecv(handle, resp);
break;
case ISC_R_ADDRINUSE: {
in_port_t localport = 0;
isc_result_t result;
/* probably a port collision; try a different one */
result = setup_socket(disp, resp, &resp->peer, &localport);
if (result == ISC_R_SUCCESS) {
UNLOCK(&disp->lock);
udp_dispatch_connect(disp, resp);
goto detach;
}
resp->state = DNS_DISPATCHSTATE_NONE;
break;
}
default:
resp->state = DNS_DISPATCHSTATE_NONE;
break;
}
unlock:
UNLOCK(&disp->lock);
dispentry_log(resp, LVL(90), "connect callback: %s",
isc_result_totext(eresult));
resp->connected(eresult, NULL, resp->arg);
detach:
dns_dispentry_detach(&resp); /* DISPENTRY004 */
}
static void
udp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) {
LOCK(&disp->lock);
resp->state = DNS_DISPATCHSTATE_CONNECTING;
dns_dispentry_ref(resp); /* DISPENTRY004 */
ISC_LIST_APPEND(disp->pending, resp, plink);
UNLOCK(&disp->lock);
isc_nm_udpconnect(disp->mgr->nm, &resp->local, &resp->peer,
udp_connected, resp, resp->timeout);
}
static isc_result_t
tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) {
dns_transport_type_t transport_type = DNS_TRANSPORT_TCP;
isc_tlsctx_t *tlsctx = NULL;
isc_tlsctx_client_session_cache_t *sess_cache = NULL;
if (resp->transport != NULL) {
transport_type = dns_transport_get_type(resp->transport);
}
if (transport_type == DNS_TRANSPORT_TLS) {
isc_result_t result;
result = dns_transport_get_tlsctx(
resp->transport, &resp->peer, resp->tlsctx_cache,
resp->disp->mgr->mctx, &tlsctx, &sess_cache);
if (result != ISC_R_SUCCESS) {
return (result);
}
INSIST(tlsctx != NULL);
}
/* Check whether the dispatch is already connecting or connected. */
LOCK(&disp->lock);
switch (disp->state) {
case DNS_DISPATCHSTATE_NONE:
/* First connection, continue with connecting */
disp->state = DNS_DISPATCHSTATE_CONNECTING;
resp->state = DNS_DISPATCHSTATE_CONNECTING;
dns_dispentry_ref(resp); /* DISPENTRY005 */
ISC_LIST_APPEND(disp->pending, resp, plink);
UNLOCK(&disp->lock);
char localbuf[ISC_SOCKADDR_FORMATSIZE];
char peerbuf[ISC_SOCKADDR_FORMATSIZE];
isc_sockaddr_format(&disp->local, localbuf,
ISC_SOCKADDR_FORMATSIZE);
isc_sockaddr_format(&disp->peer, peerbuf,
ISC_SOCKADDR_FORMATSIZE);
dns_dispatch_ref(disp); /* DISPATCH003 */
dispentry_log(resp, LVL(90),
"connecting from %s to %s, timeout %u", localbuf,
peerbuf, resp->timeout);
isc_nm_streamdnsconnect(disp->mgr->nm, &disp->local,
&disp->peer, tcp_connected, disp,
resp->timeout, tlsctx, sess_cache);
break;
case DNS_DISPATCHSTATE_CONNECTING:
/* Connection pending; add resp to the list */
resp->state = DNS_DISPATCHSTATE_CONNECTING;
dns_dispentry_ref(resp); /* DISPENTRY005 */
ISC_LIST_APPEND(disp->pending, resp, plink);
UNLOCK(&disp->lock);
break;
case DNS_DISPATCHSTATE_CONNECTED:
resp->state = DNS_DISPATCHSTATE_CONNECTED;
/* Add the resp to the reading list */
ISC_LIST_APPEND(disp->active, resp, alink);
dispentry_log(resp, LVL(90), "already connected; attaching");
resp->reading = true;
if (!disp->reading) {
/* Restart the reading */
tcp_startrecv(disp, resp);
}
UNLOCK(&disp->lock);
/* We are already connected; call the connected cb */
dispentry_log(resp, LVL(90), "connect callback: %s",
isc_result_totext(ISC_R_SUCCESS));
resp->connected(ISC_R_SUCCESS, NULL, resp->arg);
break;
default:
UNREACHABLE();
}
return (ISC_R_SUCCESS);
}
isc_result_t
dns_dispatch_connect(dns_dispentry_t *resp) {
REQUIRE(VALID_RESPONSE(resp));
REQUIRE(VALID_DISPATCH(resp->disp));
dns_dispatch_t *disp = resp->disp;
switch (disp->socktype) {
case isc_socktype_tcp:
return (tcp_dispatch_connect(disp, resp));
case isc_socktype_udp:
udp_dispatch_connect(disp, resp);
return (ISC_R_SUCCESS);
default:
UNREACHABLE();
}
}
static void
send_done(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) {
dns_dispentry_t *resp = (dns_dispentry_t *)cbarg;
REQUIRE(VALID_RESPONSE(resp));
dns_dispatch_t *disp = resp->disp;
REQUIRE(VALID_DISPATCH(disp));
dispentry_log(resp, LVL(90), "sent: %s", isc_result_totext(result));
resp->sent(result, NULL, resp->arg);
if (result != ISC_R_SUCCESS) {
dispentry_cancel(resp, result);
}
dns_dispentry_detach(&resp); /* DISPENTRY007 */
isc_nmhandle_detach(&handle);
}
static void
tcp_dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp,
int32_t timeout) {
REQUIRE(timeout <= INT16_MAX);
dispentry_log(resp, LVL(90), "continue reading");
if (!resp->reading) {
ISC_LIST_APPEND(disp->active, resp, alink);
resp->reading = true;
}
if (disp->reading) {
return;
}
if (timeout > 0) {
isc_nmhandle_settimeout(disp->handle, timeout);
}
dns_dispatch_ref(disp); /* DISPATCH002 */
isc_nm_read(disp->handle, tcp_recv, disp);
disp->reading = true;
}
static void
udp_dispatch_getnext(dns_dispentry_t *resp, int32_t timeout) {
REQUIRE(timeout <= INT16_MAX);
if (resp->reading) {
return;
}
if (timeout > 0) {
isc_nmhandle_settimeout(resp->handle, timeout);
}
dispentry_log(resp, LVL(90), "continue reading");
dns_dispentry_ref(resp); /* DISPENTRY003 */
isc_nm_read(resp->handle, udp_recv, resp);
resp->reading = true;
}
void
dns_dispatch_resume(dns_dispentry_t *resp, uint16_t timeout) {
REQUIRE(VALID_RESPONSE(resp));
REQUIRE(VALID_DISPATCH(resp->disp));
dns_dispatch_t *disp = resp->disp;
dispentry_log(resp, LVL(90), "resume");
LOCK(&disp->lock);
switch (disp->socktype) {
case isc_socktype_udp: {
udp_dispatch_getnext(resp, timeout);
break;
}
case isc_socktype_tcp:
INSIST(disp->timedout > 0);
disp->timedout--;
tcp_dispatch_getnext(disp, resp, timeout);
break;
default:
UNREACHABLE();
}
UNLOCK(&disp->lock);
}
void
dns_dispatch_send(dns_dispentry_t *resp, isc_region_t *r) {
REQUIRE(VALID_RESPONSE(resp));
REQUIRE(VALID_DISPATCH(resp->disp));
dns_dispatch_t *disp = resp->disp;
isc_nmhandle_t *sendhandle = NULL;
dispentry_log(resp, LVL(90), "sending");
switch (disp->socktype) {
case isc_socktype_udp:
isc_nmhandle_attach(resp->handle, &sendhandle);
break;
case isc_socktype_tcp:
isc_nmhandle_attach(disp->handle, &sendhandle);
break;
default:
UNREACHABLE();
}
dns_dispentry_ref(resp); /* DISPENTRY007 */
isc_nm_send(sendhandle, r, send_done, resp);
}
isc_result_t
dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
REQUIRE(VALID_DISPATCH(disp));
REQUIRE(addrp != NULL);
if (disp->socktype == isc_socktype_udp) {
*addrp = disp->local;
return (ISC_R_SUCCESS);
}
return (ISC_R_NOTIMPLEMENTED);
}
isc_result_t
dns_dispentry_getlocaladdress(dns_dispentry_t *resp, isc_sockaddr_t *addrp) {
REQUIRE(VALID_RESPONSE(resp));
REQUIRE(VALID_DISPATCH(resp->disp));
REQUIRE(addrp != NULL);
dns_dispatch_t *disp = resp->disp;
switch (disp->socktype) {
case isc_socktype_tcp:
*addrp = disp->local;
return (ISC_R_SUCCESS);
case isc_socktype_udp:
*addrp = isc_nmhandle_localaddr(resp->handle);
return (ISC_R_SUCCESS);
default:
UNREACHABLE();
}
}
dns_dispatch_t *
dns_dispatchset_get(dns_dispatchset_t *dset) {
dns_dispatch_t *disp = NULL;
/* check that dispatch set is configured */
if (dset == NULL || dset->ndisp == 0) {
return (NULL);
}
LOCK(&dset->lock);
disp = dset->dispatches[dset->cur];
dset->cur++;
if (dset->cur == dset->ndisp) {
dset->cur = 0;
}
UNLOCK(&dset->lock);
return (disp);
}
isc_result_t
dns_dispatchset_create(isc_mem_t *mctx, dns_dispatch_t *source,
dns_dispatchset_t **dsetp, int n) {
isc_result_t result;
dns_dispatchset_t *dset = NULL;
dns_dispatchmgr_t *mgr = NULL;
int i, j;
REQUIRE(VALID_DISPATCH(source));
REQUIRE(source->socktype == isc_socktype_udp);
REQUIRE(dsetp != NULL && *dsetp == NULL);
mgr = source->mgr;
dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t));
*dset = (dns_dispatchset_t){ .ndisp = n };
isc_mutex_init(&dset->lock);
dset->dispatches = isc_mem_get(mctx, sizeof(dns_dispatch_t *) * n);
isc_mem_attach(mctx, &dset->mctx);
dset->dispatches[0] = NULL;
dns_dispatch_attach(source, &dset->dispatches[0]); /* DISPATCH004 */
LOCK(&mgr->lock);
for (i = 1; i < n; i++) {
dset->dispatches[i] = NULL;
result = dispatch_createudp(mgr, &source->local,
&dset->dispatches[i]);
if (result != ISC_R_SUCCESS) {
goto fail;
}
}
UNLOCK(&mgr->lock);
*dsetp = dset;
return (ISC_R_SUCCESS);
fail:
UNLOCK(&mgr->lock);
for (j = 0; j < i; j++) {
dns_dispatch_detach(&(dset->dispatches[j])); /* DISPATCH004 */
}
isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n);
if (dset->mctx == mctx) {
isc_mem_detach(&dset->mctx);
}
isc_mutex_destroy(&dset->lock);
isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t));
return (result);
}
void
dns_dispatchset_destroy(dns_dispatchset_t **dsetp) {
dns_dispatchset_t *dset = NULL;
int i;
REQUIRE(dsetp != NULL && *dsetp != NULL);
dset = *dsetp;
*dsetp = NULL;
for (i = 0; i < dset->ndisp; i++) {
dns_dispatch_detach(&(dset->dispatches[i])); /* DISPATCH004 */
}
isc_mem_put(dset->mctx, dset->dispatches,
sizeof(dns_dispatch_t *) * dset->ndisp);
isc_mutex_destroy(&dset->lock);
isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t));
}
isc_result_t
dns_dispatch_checkperm(dns_dispatch_t *disp) {
REQUIRE(VALID_DISPATCH(disp));
if (disp->handle == NULL || disp->socktype == isc_socktype_udp) {
return (ISC_R_NOPERM);
}
return (isc_nm_xfr_checkperm(disp->handle));
}