mirror of
https://gitlab.isc.org/isc-projects/bind9
synced 2025-08-31 14:35:26 +00:00
Add stale-answer-client-timeout option
The general logic behind the addition of this new feature works as folows: When a client query arrives, the basic path (query.c / ns_query_recurse) was to create a fetch, waiting for completion in fetch_callback. With the introduction of stale-answer-client-timeout, a new event of type DNS_EVENT_TRYSTALE may invoke fetch_callback, whenever stale answers are enabled and the fetch took longer than stale-answer-client-timeout to complete. When an event of type DNS_EVENT_TRYSTALE triggers fetch_callback, we must ensure that the folowing happens: 1. Setup a new query context with the sole purpose of looking up for stale RRset only data, for that matters a new flag was added 'DNS_DBFIND_STALEONLY' used in database lookups. . If a stale RRset is found, mark the original client query as answered (with a new query attribute named NS_QUERYATTR_ANSWERED), so when the fetch completion event is received later, we avoid answering the client twice. . If a stale RRset is not found, cleanup and wait for the normal fetch completion event. 2. In ns_query_done, we must change this part: /* * If we're recursing then just return; the query will * resume when recursion ends. */ if (RECURSING(qctx->client)) { return (qctx->result); } To this: if (RECURSING(qctx->client) && !QUERY_STALEONLY(qctx->client)) { return (qctx->result); } Otherwise we would not proceed to answer the client if it happened that a stale answer was found when looking up for stale only data. When an event of type DNS_EVENT_FETCHDONE triggers fetch_callback, we proceed as before, resuming query, updating stats, etc, but a few exceptions had to be added, most important of which are two: 1. Before answering the client (ns_client_send), check if the query wasn't already answered before. 2. Before detaching a client, e.g. isc_nmhandle_detach(&client->reqhandle), ensure that this is the fetch completion event, and not the one triggered due to stale-answer-client-timeout, so a correct call would be: if (!QUERY_STALEONLY(client)) { isc_nmhandle_detach(&client->reqhandle); } Other than these notes, comments were added in code in attempt to make these updates easier to follow.
This commit is contained in:
136
lib/ns/query.c
136
lib/ns/query.c
@@ -133,6 +133,12 @@
|
||||
|
||||
#define REDIRECT(c) (((c)->query.attributes & NS_QUERYATTR_REDIRECT) != 0)
|
||||
|
||||
/*% Was the query already answered due to stale-answer-client-timeout? */
|
||||
#define QUERY_ANSWERED(c) (((c)->query.attributes & NS_QUERYATTR_ANSWERED) != 0)
|
||||
|
||||
/*% Does the query only wants to check for stale RRset? */
|
||||
#define QUERY_STALEONLY(c) (((c)->query.dboptions & DNS_DBFIND_STALEONLY) != 0)
|
||||
|
||||
/*% Does the rdataset 'r' have an attached 'No QNAME Proof'? */
|
||||
#define NOQNAME(r) (((r)->attributes & DNS_RDATASETATTR_NOQNAME) != 0)
|
||||
|
||||
@@ -557,8 +563,13 @@ query_send(ns_client_t *client) {
|
||||
}
|
||||
|
||||
inc_stats(client, counter);
|
||||
ns_client_send(client);
|
||||
isc_nmhandle_detach(&client->reqhandle);
|
||||
if (!QUERY_ANSWERED(client)) {
|
||||
ns_client_send(client);
|
||||
}
|
||||
|
||||
if (!QUERY_STALEONLY(client)) {
|
||||
isc_nmhandle_detach(&client->reqhandle);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -585,7 +596,10 @@ query_error(ns_client_t *client, isc_result_t result, int line) {
|
||||
log_queryerror(client, result, line, loglevel);
|
||||
|
||||
ns_client_error(client, result);
|
||||
isc_nmhandle_detach(&client->reqhandle);
|
||||
|
||||
if (!QUERY_STALEONLY(client)) {
|
||||
isc_nmhandle_detach(&client->reqhandle);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -598,7 +612,10 @@ query_next(ns_client_t *client, isc_result_t result) {
|
||||
inc_stats(client, ns_statscounter_failure);
|
||||
}
|
||||
ns_client_drop(client, result);
|
||||
isc_nmhandle_detach(&client->reqhandle);
|
||||
|
||||
if (!QUERY_STALEONLY(client)) {
|
||||
isc_nmhandle_detach(&client->reqhandle);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
@@ -5158,7 +5175,7 @@ qctx_freedata(query_ctx_t *qctx) {
|
||||
dns_db_detach(&qctx->zdb);
|
||||
}
|
||||
|
||||
if (qctx->event != NULL) {
|
||||
if (qctx->event != NULL && !QUERY_STALEONLY(qctx->client)) {
|
||||
free_devent(qctx->client, ISC_EVENT_PTR(&qctx->event),
|
||||
&qctx->event);
|
||||
}
|
||||
@@ -5583,6 +5600,7 @@ query_lookup(query_ctx_t *qctx) {
|
||||
unsigned int dboptions;
|
||||
dns_ttl_t stale_refresh = 0;
|
||||
bool dbfind_stale = false;
|
||||
bool stale_ok;
|
||||
|
||||
CCTRACE(ISC_LOG_DEBUG(3), "query_lookup");
|
||||
|
||||
@@ -5681,10 +5699,10 @@ query_lookup(query_ctx_t *qctx) {
|
||||
* answer, otherwise "fresh" answers are also treated as stale.
|
||||
*/
|
||||
dbfind_stale = ((dboptions & DNS_DBFIND_STALEOK) != 0);
|
||||
if (dbfind_stale != 0 ||
|
||||
(((dboptions & DNS_DBFIND_STALEENABLED) != 0) &&
|
||||
STALE(qctx->rdataset)))
|
||||
{
|
||||
stale_ok = ((dboptions &
|
||||
(DNS_DBFIND_STALEENABLED | DNS_DBFIND_STALEONLY)) != 0);
|
||||
|
||||
if (dbfind_stale != 0 || (stale_ok && STALE(qctx->rdataset))) {
|
||||
char namebuf[DNS_NAME_FORMATSIZE];
|
||||
bool success;
|
||||
|
||||
@@ -5709,6 +5727,12 @@ query_lookup(query_ctx_t *qctx) {
|
||||
"%s resolver failure, stale answer %s",
|
||||
namebuf,
|
||||
success ? "used" : "unavailable");
|
||||
} else if ((dboptions & DNS_DBFIND_STALEONLY) != 0) {
|
||||
isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE,
|
||||
NS_LOGMODULE_QUERY, ISC_LOG_INFO,
|
||||
"%s client timeout, stale answer %s",
|
||||
namebuf,
|
||||
success ? "used" : "unavailable");
|
||||
} else {
|
||||
isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE,
|
||||
NS_LOGMODULE_QUERY, ISC_LOG_INFO,
|
||||
@@ -5719,21 +5743,75 @@ query_lookup(query_ctx_t *qctx) {
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
QUERY_ERROR(qctx, DNS_R_SERVFAIL);
|
||||
return (ns_query_done(qctx));
|
||||
/*
|
||||
* If DNS_DBFIND_STALEONLY is set then it means
|
||||
* stale-answer-client-timeout was triggered, in
|
||||
* that case we only want to check if a stale RRset is
|
||||
* available, if that's the case we promptly answer the
|
||||
* client with the stale data found. If a stale RRset is
|
||||
* not available then we must wait for the original
|
||||
* query to be resumed in order to build a proper
|
||||
* answer.
|
||||
*/
|
||||
if ((dboptions & DNS_DBFIND_STALEONLY) == 0) {
|
||||
QUERY_ERROR(qctx, DNS_R_SERVFAIL);
|
||||
return (ns_query_done(qctx));
|
||||
}
|
||||
|
||||
return (result);
|
||||
}
|
||||
}
|
||||
return (query_gotanswer(qctx, result));
|
||||
|
||||
/*
|
||||
* If DNS_DBFIND_STALEONLY is disabled then we proceed as normal,
|
||||
* otherwise we only proceed with query_gotanswer if we
|
||||
* successfully found a stale RRset in cache.
|
||||
*/
|
||||
if (((dboptions & DNS_DBFIND_STALEONLY) == 0) ||
|
||||
result == ISC_R_SUCCESS || result == DNS_R_GLUE ||
|
||||
result == DNS_R_ZONECUT)
|
||||
{
|
||||
return (query_gotanswer(qctx, result));
|
||||
}
|
||||
|
||||
cleanup:
|
||||
return (result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Event handler to resume processing a query after recursion.
|
||||
* If the query has timed out or been canceled or the system
|
||||
* is shutting down, clean up and exit; otherwise, call
|
||||
* query_resume() to continue the ongoing work.
|
||||
* Create a new query context with the sole intent
|
||||
* of looking up for a stale RRset in cache.
|
||||
* If an entry is found, we mark the original query as
|
||||
* answered, in order to avoid answering the query twice,
|
||||
* when the original fetch finishes.
|
||||
*/
|
||||
static inline void
|
||||
query_lookup_staleonly(ns_client_t *client, dns_fetchevent_t *devent) {
|
||||
query_ctx_t qctx;
|
||||
isc_result_t result;
|
||||
|
||||
qctx_init(client, &devent, client->query.qtype, &qctx);
|
||||
dns_db_attach(client->view->cachedb, &qctx.db);
|
||||
client->query.dboptions |= DNS_DBFIND_STALEONLY;
|
||||
result = query_lookup(&qctx);
|
||||
if (result == ISC_R_SUCCESS) {
|
||||
client->query.attributes |= NS_QUERYATTR_ANSWERED;
|
||||
}
|
||||
if (qctx.node != NULL) {
|
||||
dns_db_detachnode(qctx.db, &qctx.node);
|
||||
}
|
||||
qctx_freedata(&qctx);
|
||||
client->query.dboptions &= ~DNS_DBFIND_STALEONLY;
|
||||
qctx_destroy(&qctx);
|
||||
isc_event_free(ISC_EVENT_PTR(&qctx.event));
|
||||
}
|
||||
|
||||
/*
|
||||
* Event handler to resume processing a query after recursion, or when a
|
||||
* client timeout is triggered. If the query has timed out or been cancelled
|
||||
* or the system is shutting down, clean up and exit. If a client timeout is
|
||||
* triggered, see if we can respond with a stale answer from cache. Otherwise,
|
||||
* call query_resume() to continue the ongoing work.
|
||||
*/
|
||||
static void
|
||||
fetch_callback(isc_task_t *task, isc_event_t *event) {
|
||||
@@ -5748,7 +5826,8 @@ fetch_callback(isc_task_t *task, isc_event_t *event) {
|
||||
|
||||
UNUSED(task);
|
||||
|
||||
REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
|
||||
REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE ||
|
||||
event->ev_type == DNS_EVENT_TRYSTALE);
|
||||
client = devent->ev_arg;
|
||||
REQUIRE(NS_CLIENT_VALID(client));
|
||||
REQUIRE(task == client->task);
|
||||
@@ -5756,6 +5835,11 @@ fetch_callback(isc_task_t *task, isc_event_t *event) {
|
||||
|
||||
CTRACE(ISC_LOG_DEBUG(3), "fetch_callback");
|
||||
|
||||
if (event->ev_type == DNS_EVENT_TRYSTALE) {
|
||||
query_lookup_staleonly(client, devent);
|
||||
return;
|
||||
}
|
||||
|
||||
LOCK(&client->query.fetchlock);
|
||||
if (client->query.fetch != NULL) {
|
||||
/*
|
||||
@@ -6076,6 +6160,10 @@ ns_query_recurse(ns_client_t *client, dns_rdatatype_t qtype, dns_name_t *qname,
|
||||
peeraddr = &client->peeraddr;
|
||||
}
|
||||
|
||||
if (dns_view_staleanswerenabled(client->view)) {
|
||||
client->query.fetchoptions |= DNS_FETCHOPT_TRYSTALE_ONTIMEOUT;
|
||||
}
|
||||
|
||||
isc_nmhandle_attach(client->handle, &client->fetchhandle);
|
||||
result = dns_resolver_createfetch(
|
||||
client->view->resolver, qname, qtype, qdomain, nameservers,
|
||||
@@ -7678,7 +7766,9 @@ query_addanswer(query_ctx_t *qctx) {
|
||||
query_filter64(qctx);
|
||||
ns_client_putrdataset(qctx->client, &qctx->rdataset);
|
||||
} else {
|
||||
if (!qctx->is_zone && RECURSIONOK(qctx->client)) {
|
||||
if (!qctx->is_zone && RECURSIONOK(qctx->client) &&
|
||||
!QUERY_STALEONLY(qctx->client))
|
||||
{
|
||||
query_prefetch(qctx->client, qctx->fname,
|
||||
qctx->rdataset);
|
||||
}
|
||||
@@ -7786,7 +7876,7 @@ query_respond(query_ctx_t *qctx) {
|
||||
* We shouldn't ever fail to add 'rdataset'
|
||||
* because it's already in the answer.
|
||||
*/
|
||||
INSIST(qctx->rdataset == NULL);
|
||||
INSIST(qctx->rdataset == NULL || QUERY_ANSWERED(qctx->client));
|
||||
|
||||
query_addauth(qctx);
|
||||
|
||||
@@ -11271,7 +11361,7 @@ ns_query_done(query_ctx_t *qctx) {
|
||||
* If we're recursing then just return; the query will
|
||||
* resume when recursion ends.
|
||||
*/
|
||||
if (RECURSING(qctx->client)) {
|
||||
if (RECURSING(qctx->client) && !QUERY_STALEONLY(qctx->client)) {
|
||||
return (qctx->result);
|
||||
}
|
||||
|
||||
@@ -11282,8 +11372,10 @@ ns_query_done(query_ctx_t *qctx) {
|
||||
* to the AA bit if the auth-nxdomain config option
|
||||
* says so, then render and send the response.
|
||||
*/
|
||||
query_setup_sortlist(qctx);
|
||||
query_glueanswer(qctx);
|
||||
if (!QUERY_ANSWERED(qctx->client)) {
|
||||
query_setup_sortlist(qctx);
|
||||
query_glueanswer(qctx);
|
||||
}
|
||||
|
||||
if (qctx->client->message->rcode == dns_rcode_nxdomain &&
|
||||
qctx->view->auth_nxdomain)
|
||||
|
Reference in New Issue
Block a user