2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-22 01:59:26 +00:00

Add stale-answer-client-timeout option

The general logic behind the addition of this new feature works as
folows:

When a client query arrives, the basic path (query.c / ns_query_recurse)
was to create a fetch, waiting for completion in fetch_callback.

With the introduction of stale-answer-client-timeout, a new event of
type DNS_EVENT_TRYSTALE may invoke fetch_callback, whenever stale
answers are enabled and the fetch took longer than
stale-answer-client-timeout to complete.

When an event of type DNS_EVENT_TRYSTALE triggers fetch_callback, we
must ensure that the folowing happens:

1. Setup a new query context with the sole purpose of looking up for
   stale RRset only data, for that matters a new flag was added
   'DNS_DBFIND_STALEONLY' used in database lookups.

    . If a stale RRset is found, mark the original client query as
      answered (with a new query attribute named NS_QUERYATTR_ANSWERED),
      so when the fetch completion event is received later, we avoid
      answering the client twice.

    . If a stale RRset is not found, cleanup and wait for the normal
      fetch completion event.

2. In ns_query_done, we must change this part:
	/*
	 * If we're recursing then just return; the query will
	 * resume when recursion ends.
	 */
	if (RECURSING(qctx->client)) {
		return (qctx->result);
	}

   To this:

	if (RECURSING(qctx->client) && !QUERY_STALEONLY(qctx->client)) {
		return (qctx->result);
	}

   Otherwise we would not proceed to answer the client if it happened
   that a stale answer was found when looking up for stale only data.

When an event of type DNS_EVENT_FETCHDONE triggers fetch_callback, we
proceed as before, resuming query, updating stats, etc, but a few
exceptions had to be added, most important of which are two:

1. Before answering the client (ns_client_send), check if the query
   wasn't already answered before.

2. Before detaching a client, e.g.
   isc_nmhandle_detach(&client->reqhandle), ensure that this is the
   fetch completion event, and not the one triggered due to
   stale-answer-client-timeout, so a correct call would be:
   if (!QUERY_STALEONLY(client)) {
        isc_nmhandle_detach(&client->reqhandle);
   }

Other than these notes, comments were added in code in attempt to make
these updates easier to follow.
This commit is contained in:
Diego Fronza 2020-12-11 14:10:31 -03:00
parent 74840ec50b
commit 171a5b7542
12 changed files with 374 additions and 31 deletions

View File

@ -194,9 +194,10 @@ options {\n\
servfail-ttl 1;\n\
# sortlist <none>\n\
stale-answer-enable false;\n\
stale-refresh-time 30; /* 30 seconds */\n\
stale-answer-client-timeout 1800; /* in milliseconds */\n\
stale-answer-ttl 30; /* 30 seconds */\n\
stale-cache-enable false;\n\
stale-refresh-time 30; /* 30 seconds */\n\
synth-from-dnssec no;\n\
# topology <none>\n\
transfer-format many-answers;\n\

View File

@ -4485,6 +4485,11 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config,
view->staleanswersok = dns_stale_answer_conf;
}
obj = NULL;
result = named_config_get(maps, "stale-answer-client-timeout", &obj);
INSIST(result == ISC_R_SUCCESS);
view->staleanswerclienttimeout = cfg_obj_asuint32(obj);
obj = NULL;
result = named_config_get(maps, "stale-refresh-time", &obj);
INSIST(result == ISC_R_SUCCESS);

View File

@ -239,8 +239,35 @@ struct dns_dbonupdatelistener {
#define DNS_DBFIND_FORCENSEC3 0x0080
#define DNS_DBFIND_ADDITIONALOK 0x0100
#define DNS_DBFIND_NOZONECUT 0x0200
#define DNS_DBFIND_STALEOK 0x0400
/*
* DNS_DBFIND_STALEOK: This flag is set when BIND fails to refresh a
* RRset due to timeout (resolver-query-timeout), its intent is to
* try to look for stale data in cache as a fallback, but only if
* stale answers are enabled in configuration.
*
* This flag is also used to activate stale-refresh-time window, since it
* is the only way the database knows that a resolution has failed.
*/
#define DNS_DBFIND_STALEOK 0x0400
/*
* DNS_DBFIND_STALEENABLED: This flag is used as a hint to the database
* that it may use stale data. It is always set during query lookup if
* stale answers are enabled, but only effectively used during
* stale-refresh-time window. Also during this window, the resolver will
* not try to resolve the query, in other words no attempt to refresh the
* data in cache is made when the stale-refresh-time window is active.
*/
#define DNS_DBFIND_STALEENABLED 0x0800
/*
* DNS_DBFIND_STALEONLY: This new introduced flag is used when we want
* stale data from the database, but not due to a failure in resolution,
* it also doesn't require stale-refresh-time window timer to be active.
* As long as there is a stale RRset available, it should be returned.
*/
#define DNS_DBFIND_STALEONLY 0x1000
/*@}*/
/*@{*/

View File

@ -78,6 +78,7 @@
#define DNS_EVENT_CATZDELZONE (ISC_EVENTCLASS_DNS + 56)
#define DNS_EVENT_RPZUPDATED (ISC_EVENTCLASS_DNS + 57)
#define DNS_EVENT_STARTUPDATE (ISC_EVENTCLASS_DNS + 58)
#define DNS_EVENT_TRYSTALE (ISC_EVENTCLASS_DNS + 59)
#define DNS_EVENT_FIRSTEVENT (ISC_EVENTCLASS_DNS + 0)
#define DNS_EVENT_LASTEVENT (ISC_EVENTCLASS_DNS + 65535)

View File

@ -129,9 +129,10 @@ typedef enum { dns_quotatype_zone = 0, dns_quotatype_server } dns_quotatype_t;
* if possible. */
/* Reserved in use by adb.c 0x00400000 */
#define DNS_FETCHOPT_EDNSVERSIONSET 0x00800000
#define DNS_FETCHOPT_EDNSVERSIONMASK 0xff000000
#define DNS_FETCHOPT_EDNSVERSIONSHIFT 24
#define DNS_FETCHOPT_EDNSVERSIONSET 0x00800000
#define DNS_FETCHOPT_EDNSVERSIONMASK 0xff000000
#define DNS_FETCHOPT_EDNSVERSIONSHIFT 24
#define DNS_FETCHOPT_TRYSTALE_ONTIMEOUT 0x01000000
/*
* Upper bounds of class of query RTT (ms). Corresponds to

View File

@ -173,6 +173,7 @@ struct dns_view {
dns_stale_answer_t staleanswersok; /* rndc setting */
bool staleanswersenable; /* named.conf setting
* */
uint32_t staleanswerclienttimeout;
uint16_t nocookieudp;
uint16_t padding;
dns_acl_t * pad_acl;

View File

@ -4562,6 +4562,13 @@ check_stale_header(dns_rbtnode_t *node, rdatasetheader_t *header,
*/
if ((search->options & DNS_DBFIND_STALEOK) != 0) {
header->last_refresh_fail_ts = search->now;
} else if ((search->options & DNS_DBFIND_STALEONLY) !=
0) {
/*
* We want stale RRset only, so we don't skip
* it.
*/
return (false);
} else if ((search->options &
DNS_DBFIND_STALEENABLED) != 0 &&
search->now <

View File

@ -307,7 +307,9 @@ struct fetchctx {
dns_rdataset_t nameservers;
atomic_uint_fast32_t attributes;
isc_timer_t *timer;
isc_timer_t *timer_try_stale;
isc_time_t expires;
isc_time_t expires_try_stale;
isc_interval_t interval;
dns_message_t *qmessage;
ISC_LIST(resquery_t) queries;
@ -1135,6 +1137,16 @@ fctx_starttimer(fetchctx_t *fctx) {
NULL, true));
}
static inline isc_result_t
fctx_starttimer_trystale(fetchctx_t *fctx) {
/*
* Start the stale-answer-client-timeout timer for fctx.
*/
return (isc_timer_reset(fctx->timer_try_stale, isc_timertype_once,
&fctx->expires_try_stale, NULL, true));
}
static inline void
fctx_stoptimer(fetchctx_t *fctx) {
isc_result_t result;
@ -1153,6 +1165,22 @@ fctx_stoptimer(fetchctx_t *fctx) {
}
}
static inline void
fctx_stoptimer_trystale(fetchctx_t *fctx) {
isc_result_t result;
if (fctx->timer_try_stale != NULL) {
result = isc_timer_reset(fctx->timer_try_stale,
isc_timertype_inactive, NULL, NULL,
true);
if (result != ISC_R_SUCCESS) {
UNEXPECTED_ERROR(__FILE__, __LINE__,
"isc_timer_reset(): %s",
isc_result_totext(result));
}
}
}
static inline isc_result_t
fctx_startidletimer(fetchctx_t *fctx, isc_interval_t *interval) {
/*
@ -1537,6 +1565,7 @@ fctx_stopqueries(fetchctx_t *fctx, bool no_response, bool age_untried) {
FCTXTRACE("stopqueries");
fctx_cancelqueries(fctx, no_response, age_untried);
fctx_stoptimer(fctx);
fctx_stoptimer_trystale(fctx);
}
static inline void
@ -1697,6 +1726,16 @@ fctx_sendevents(fetchctx_t *fctx, isc_result_t result, int line) {
event = next_event) {
next_event = ISC_LIST_NEXT(event, ev_link);
ISC_LIST_UNLINK(fctx->events, event, ev_link);
if (event->ev_type == DNS_EVENT_TRYSTALE) {
/*
* Not applicable to TRY STALE events, this function is
* called when the fetch has either completed or timed
* out due to resolver-query-timeout being reached.
*/
isc_task_detach((isc_task_t **)&event->ev_sender);
isc_event_free((isc_event_t **)&event);
continue;
}
task = event->ev_sender;
event->ev_sender = fctx;
event->vresult = fctx->vresult;
@ -4182,7 +4221,13 @@ fctx_try(fetchctx_t *fctx, bool retrying, bool badcache) {
*/
if (fctx->minimized && !fctx->forwarding) {
unsigned int options = fctx->options;
options &= ~DNS_FETCHOPT_QMINIMIZE;
/*
* Also clear DNS_FETCHOPT_TRYSTALE_ONTIMEOUT here, otherwise
* every query minimization step will activate the try-stale
* timer again.
*/
options &= ~(DNS_FETCHOPT_QMINIMIZE |
DNS_FETCHOPT_TRYSTALE_ONTIMEOUT);
/*
* Is another QNAME minimization fetch still running?
@ -4222,6 +4267,7 @@ fctx_try(fetchctx_t *fctx, bool retrying, bool badcache) {
fctx_increference(fctx);
task = res->buckets[bucketnum].task;
fctx_stoptimer(fctx);
fctx_stoptimer_trystale(fctx);
result = dns_resolver_createfetch(
fctx->res, &fctx->qminname, fctx->qmintype,
&fctx->domain, &fctx->nameservers, NULL, NULL, 0,
@ -4247,6 +4293,7 @@ fctx_try(fetchctx_t *fctx, bool retrying, bool badcache) {
}
fctx_increference(fctx);
result = fctx_query(fctx, addrinfo, fctx->options);
if (result != ISC_R_SUCCESS) {
fctx_done(fctx, result, __LINE__);
@ -4504,6 +4551,9 @@ fctx_destroy(fetchctx_t *fctx) {
isc_counter_detach(&fctx->qc);
fcount_decr(fctx);
isc_timer_detach(&fctx->timer);
if (fctx->timer_try_stale != NULL) {
isc_timer_detach(&fctx->timer_try_stale);
}
dns_message_detach(&fctx->qmessage);
if (dns_name_countlabels(&fctx->domain) > 0) {
dns_name_free(&fctx->domain, fctx->mctx);
@ -4579,6 +4629,57 @@ fctx_timeout(isc_task_t *task, isc_event_t *event) {
isc_event_free(&event);
}
/*
* Fetch event handlers called if stale answers are enabled
* (stale-answer-enabled) and the fetch took more than
* stale-answer-client-timeout to complete.
*/
static void
fctx_timeout_try_stale(isc_task_t *task, isc_event_t *event) {
fetchctx_t *fctx = event->ev_arg;
dns_fetchevent_t *dns_event, *next_event;
isc_task_t *sender_task;
unsigned int count = 0;
REQUIRE(VALID_FCTX(fctx));
UNUSED(task);
FCTXTRACE("timeout_try_stale");
if (event->ev_type != ISC_TIMEREVENT_LIFE) {
return;
}
LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
/*
* Trigger events of type DNS_EVENT_TRYSTALE.
*/
for (dns_event = ISC_LIST_HEAD(fctx->events); dns_event != NULL;
dns_event = next_event)
{
next_event = ISC_LIST_NEXT(dns_event, ev_link);
if (dns_event->ev_type != DNS_EVENT_TRYSTALE) {
continue;
}
ISC_LIST_UNLINK(fctx->events, dns_event, ev_link);
sender_task = dns_event->ev_sender;
dns_event->ev_sender = fctx;
dns_event->vresult = ISC_R_TIMEDOUT;
dns_event->result = ISC_R_TIMEDOUT;
isc_task_sendanddetach(&sender_task, ISC_EVENT_PTR(&dns_event));
count++;
}
UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
isc_event_free(&event);
}
static void
fctx_shutdown(fetchctx_t *fctx) {
isc_event_t *cevent;
@ -4760,6 +4861,9 @@ fctx_start(isc_task_t *task, isc_event_t *event) {
* All is well. Start working on the fetch.
*/
result = fctx_starttimer(fctx);
if (result == ISC_R_SUCCESS && fctx->timer_try_stale != NULL) {
result = fctx_starttimer_trystale(fctx);
}
if (result != ISC_R_SUCCESS) {
fctx_done(fctx, result, __LINE__);
} else {
@ -4826,6 +4930,34 @@ fctx_join(fetchctx_t *fctx, isc_task_t *task, const isc_sockaddr_t *client,
return (ISC_R_SUCCESS);
}
static inline void
fctx_add_event(fetchctx_t *fctx, isc_task_t *task, const isc_sockaddr_t *client,
dns_messageid_t id, isc_taskaction_t action, void *arg,
dns_fetch_t *fetch, isc_eventtype_t event_type) {
isc_task_t *tclone;
dns_fetchevent_t *event;
/*
* We store the task we're going to send this event to in the
* sender field. We'll make the fetch the sender when we actually
* send the event.
*/
tclone = NULL;
isc_task_attach(task, &tclone);
event = (dns_fetchevent_t *)isc_event_allocate(fctx->res->mctx, tclone,
event_type, action, arg,
sizeof(*event));
event->result = DNS_R_SERVFAIL;
event->qtype = fctx->type;
event->db = NULL;
event->node = NULL;
event->rdataset = NULL;
event->sigrdataset = NULL;
event->fetch = fetch;
event->client = client;
event->id = id;
ISC_LIST_APPEND(fctx->events, event, ev_link);
}
static inline void
log_ns_ttl(fetchctx_t *fctx, const char *where) {
char namebuf[DNS_NAME_FORMATSIZE];
@ -4854,6 +4986,7 @@ fctx_create(dns_resolver_t *res, const dns_name_t *name, dns_rdatatype_t type,
char typebuf[DNS_RDATATYPE_FORMATSIZE];
isc_mem_t *mctx;
size_t p;
bool try_stale;
/*
* Caller must be holding the lock for bucket number 'bucketnum'.
@ -5078,6 +5211,29 @@ fctx_create(dns_resolver_t *res, const dns_name_t *name, dns_rdatatype_t type,
goto cleanup_qmessage;
}
try_stale = ((options & DNS_FETCHOPT_TRYSTALE_ONTIMEOUT) != 0);
if (try_stale) {
INSIST(res->view->staleanswerclienttimeout <=
(res->query_timeout - 1000));
/*
* Compute an expiration time after which stale data will
* attempted to be served, if stale answers are enabled and
* target RRset is available in cache.
*/
isc_interval_set(
&interval, res->view->staleanswerclienttimeout / 1000,
res->view->staleanswerclienttimeout % 1000 * 1000000);
iresult = isc_time_nowplusinterval(&fctx->expires_try_stale,
&interval);
if (iresult != ISC_R_SUCCESS) {
UNEXPECTED_ERROR(__FILE__, __LINE__,
"isc_time_nowplusinterval: %s",
isc_result_totext(iresult));
result = ISC_R_UNEXPECTED;
goto cleanup_qmessage;
}
}
/*
* Default retry interval initialization. We set the interval now
* mostly so it won't be uninitialized. It will be set to the
@ -5086,10 +5242,11 @@ fctx_create(dns_resolver_t *res, const dns_name_t *name, dns_rdatatype_t type,
isc_interval_set(&fctx->interval, 2, 0);
/*
* Create an inactive timer. It will be made active when the fetch
* is actually started.
* Create an inactive timer for resolver-query-timeout. It
* will be made active when the fetch is actually started.
*/
fctx->timer = NULL;
iresult = isc_timer_create(res->timermgr, isc_timertype_inactive, NULL,
NULL, res->buckets[bucketnum].task,
fctx_timeout, fctx, &fctx->timer);
@ -5100,6 +5257,26 @@ fctx_create(dns_resolver_t *res, const dns_name_t *name, dns_rdatatype_t type,
goto cleanup_qmessage;
}
/*
* If stale answers are enabled, then create an inactive timer
* for stale-answer-client-timeout. It will be made active when
* the fetch is actually started.
*/
fctx->timer_try_stale = NULL;
if (try_stale) {
iresult = isc_timer_create(
res->timermgr, isc_timertype_inactive, NULL, NULL,
res->buckets[bucketnum].task, fctx_timeout_try_stale,
fctx, &fctx->timer_try_stale);
if (iresult != ISC_R_SUCCESS) {
UNEXPECTED_ERROR(__FILE__, __LINE__,
"isc_timer_create: %s",
isc_result_totext(iresult));
result = ISC_R_UNEXPECTED;
goto cleanup_qmessage;
}
}
/*
* Attach to the view's cache and adb.
*/
@ -5144,6 +5321,7 @@ cleanup_mctx:
dns_adb_detach(&fctx->adb);
dns_db_detach(&fctx->cache);
isc_timer_detach(&fctx->timer);
isc_timer_detach(&fctx->timer_try_stale);
cleanup_qmessage:
dns_message_detach(&fctx->qmessage);
@ -5357,6 +5535,23 @@ clone_results(fetchctx_t *fctx) {
for (event = ISC_LIST_NEXT(hevent, ev_link); event != NULL;
event = ISC_LIST_NEXT(event, ev_link))
{
if (event->ev_type == DNS_EVENT_TRYSTALE) {
/*
* We don't need to clone resulting data to this
* type of event, as its associated callback is only
* called when stale-answer-client-timeout triggers,
* and the logic in there doesn't expect any result
* as input, as it will itself lookup for stale data
* in cache to use as result, if any is available.
*
* Also, if we reached this point, then the whole fetch
* context is done, it will cancel timers, process
* associated callbacks of type DNS_EVENT_FETCHDONE, and
* silently remove/free events of type
* DNS_EVENT_TRYSTALE.
*/
continue;
}
name = dns_fixedname_name(&event->foundname);
dns_name_copynf(hname, name);
event->result = hevent->result;
@ -6126,6 +6321,7 @@ cache_name(fetchctx_t *fctx, dns_name_t *name, dns_message_t *message,
(!need_validation)) {
have_answer = true;
event = ISC_LIST_HEAD(fctx->events);
if (event != NULL) {
adbp = &event->db;
aname = dns_fixedname_name(&event->foundname);
@ -10815,6 +11011,14 @@ dns_resolver_createfetch(dns_resolver_t *res, const dns_name_t *name,
result = fctx_join(fctx, task, client, id, action, arg, rdataset,
sigrdataset, fetch);
if (result == ISC_R_SUCCESS &&
((options & DNS_FETCHOPT_TRYSTALE_ONTIMEOUT) != 0))
{
fctx_add_event(fctx, task, client, id, action, arg, fetch,
DNS_EVENT_TRYSTALE);
}
if (new_fctx) {
if (result == ISC_R_SUCCESS) {
/*

View File

@ -2027,6 +2027,7 @@ static cfg_clausedef_t view_clauses[] = {
{ "servfail-ttl", &cfg_type_duration, 0 },
{ "sortlist", &cfg_type_bracketed_aml, 0 },
{ "stale-answer-enable", &cfg_type_boolean, 0 },
{ "stale-answer-client-timeout", &cfg_type_uint32, 0 },
{ "stale-answer-ttl", &cfg_type_duration, 0 },
{ "stale-cache-enable", &cfg_type_boolean, 0 },
{ "stale-refresh-time", &cfg_type_duration, 0 },

View File

@ -879,7 +879,9 @@ ns_client_error(ns_client_t *client, isc_result_t result) {
}
}
ns_client_send(client);
if ((client->query.attributes & NS_QUERYATTR_ANSWERED) == 0) {
ns_client_send(client);
}
}
isc_result_t

View File

@ -117,6 +117,7 @@ struct ns_query {
#define NS_QUERYATTR_DNS64EXCLUDE 0x08000
#define NS_QUERYATTR_RRL_CHECKED 0x10000
#define NS_QUERYATTR_REDIRECT 0x20000
#define NS_QUERYATTR_ANSWERED 0x40000
typedef struct query_ctx query_ctx_t;

View File

@ -133,6 +133,12 @@
#define REDIRECT(c) (((c)->query.attributes & NS_QUERYATTR_REDIRECT) != 0)
/*% Was the query already answered due to stale-answer-client-timeout? */
#define QUERY_ANSWERED(c) (((c)->query.attributes & NS_QUERYATTR_ANSWERED) != 0)
/*% Does the query only wants to check for stale RRset? */
#define QUERY_STALEONLY(c) (((c)->query.dboptions & DNS_DBFIND_STALEONLY) != 0)
/*% Does the rdataset 'r' have an attached 'No QNAME Proof'? */
#define NOQNAME(r) (((r)->attributes & DNS_RDATASETATTR_NOQNAME) != 0)
@ -557,8 +563,13 @@ query_send(ns_client_t *client) {
}
inc_stats(client, counter);
ns_client_send(client);
isc_nmhandle_detach(&client->reqhandle);
if (!QUERY_ANSWERED(client)) {
ns_client_send(client);
}
if (!QUERY_STALEONLY(client)) {
isc_nmhandle_detach(&client->reqhandle);
}
}
static void
@ -585,7 +596,10 @@ query_error(ns_client_t *client, isc_result_t result, int line) {
log_queryerror(client, result, line, loglevel);
ns_client_error(client, result);
isc_nmhandle_detach(&client->reqhandle);
if (!QUERY_STALEONLY(client)) {
isc_nmhandle_detach(&client->reqhandle);
}
}
static void
@ -598,7 +612,10 @@ query_next(ns_client_t *client, isc_result_t result) {
inc_stats(client, ns_statscounter_failure);
}
ns_client_drop(client, result);
isc_nmhandle_detach(&client->reqhandle);
if (!QUERY_STALEONLY(client)) {
isc_nmhandle_detach(&client->reqhandle);
}
}
static inline void
@ -5158,7 +5175,7 @@ qctx_freedata(query_ctx_t *qctx) {
dns_db_detach(&qctx->zdb);
}
if (qctx->event != NULL) {
if (qctx->event != NULL && !QUERY_STALEONLY(qctx->client)) {
free_devent(qctx->client, ISC_EVENT_PTR(&qctx->event),
&qctx->event);
}
@ -5583,6 +5600,7 @@ query_lookup(query_ctx_t *qctx) {
unsigned int dboptions;
dns_ttl_t stale_refresh = 0;
bool dbfind_stale = false;
bool stale_ok;
CCTRACE(ISC_LOG_DEBUG(3), "query_lookup");
@ -5681,10 +5699,10 @@ query_lookup(query_ctx_t *qctx) {
* answer, otherwise "fresh" answers are also treated as stale.
*/
dbfind_stale = ((dboptions & DNS_DBFIND_STALEOK) != 0);
if (dbfind_stale != 0 ||
(((dboptions & DNS_DBFIND_STALEENABLED) != 0) &&
STALE(qctx->rdataset)))
{
stale_ok = ((dboptions &
(DNS_DBFIND_STALEENABLED | DNS_DBFIND_STALEONLY)) != 0);
if (dbfind_stale != 0 || (stale_ok && STALE(qctx->rdataset))) {
char namebuf[DNS_NAME_FORMATSIZE];
bool success;
@ -5709,6 +5727,12 @@ query_lookup(query_ctx_t *qctx) {
"%s resolver failure, stale answer %s",
namebuf,
success ? "used" : "unavailable");
} else if ((dboptions & DNS_DBFIND_STALEONLY) != 0) {
isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE,
NS_LOGMODULE_QUERY, ISC_LOG_INFO,
"%s client timeout, stale answer %s",
namebuf,
success ? "used" : "unavailable");
} else {
isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE,
NS_LOGMODULE_QUERY, ISC_LOG_INFO,
@ -5719,21 +5743,75 @@ query_lookup(query_ctx_t *qctx) {
}
if (!success) {
QUERY_ERROR(qctx, DNS_R_SERVFAIL);
return (ns_query_done(qctx));
/*
* If DNS_DBFIND_STALEONLY is set then it means
* stale-answer-client-timeout was triggered, in
* that case we only want to check if a stale RRset is
* available, if that's the case we promptly answer the
* client with the stale data found. If a stale RRset is
* not available then we must wait for the original
* query to be resumed in order to build a proper
* answer.
*/
if ((dboptions & DNS_DBFIND_STALEONLY) == 0) {
QUERY_ERROR(qctx, DNS_R_SERVFAIL);
return (ns_query_done(qctx));
}
return (result);
}
}
return (query_gotanswer(qctx, result));
/*
* If DNS_DBFIND_STALEONLY is disabled then we proceed as normal,
* otherwise we only proceed with query_gotanswer if we
* successfully found a stale RRset in cache.
*/
if (((dboptions & DNS_DBFIND_STALEONLY) == 0) ||
result == ISC_R_SUCCESS || result == DNS_R_GLUE ||
result == DNS_R_ZONECUT)
{
return (query_gotanswer(qctx, result));
}
cleanup:
return (result);
}
/*
* Event handler to resume processing a query after recursion.
* If the query has timed out or been canceled or the system
* is shutting down, clean up and exit; otherwise, call
* query_resume() to continue the ongoing work.
* Create a new query context with the sole intent
* of looking up for a stale RRset in cache.
* If an entry is found, we mark the original query as
* answered, in order to avoid answering the query twice,
* when the original fetch finishes.
*/
static inline void
query_lookup_staleonly(ns_client_t *client, dns_fetchevent_t *devent) {
query_ctx_t qctx;
isc_result_t result;
qctx_init(client, &devent, client->query.qtype, &qctx);
dns_db_attach(client->view->cachedb, &qctx.db);
client->query.dboptions |= DNS_DBFIND_STALEONLY;
result = query_lookup(&qctx);
if (result == ISC_R_SUCCESS) {
client->query.attributes |= NS_QUERYATTR_ANSWERED;
}
if (qctx.node != NULL) {
dns_db_detachnode(qctx.db, &qctx.node);
}
qctx_freedata(&qctx);
client->query.dboptions &= ~DNS_DBFIND_STALEONLY;
qctx_destroy(&qctx);
isc_event_free(ISC_EVENT_PTR(&qctx.event));
}
/*
* Event handler to resume processing a query after recursion, or when a
* client timeout is triggered. If the query has timed out or been cancelled
* or the system is shutting down, clean up and exit. If a client timeout is
* triggered, see if we can respond with a stale answer from cache. Otherwise,
* call query_resume() to continue the ongoing work.
*/
static void
fetch_callback(isc_task_t *task, isc_event_t *event) {
@ -5748,7 +5826,8 @@ fetch_callback(isc_task_t *task, isc_event_t *event) {
UNUSED(task);
REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE ||
event->ev_type == DNS_EVENT_TRYSTALE);
client = devent->ev_arg;
REQUIRE(NS_CLIENT_VALID(client));
REQUIRE(task == client->task);
@ -5756,6 +5835,11 @@ fetch_callback(isc_task_t *task, isc_event_t *event) {
CTRACE(ISC_LOG_DEBUG(3), "fetch_callback");
if (event->ev_type == DNS_EVENT_TRYSTALE) {
query_lookup_staleonly(client, devent);
return;
}
LOCK(&client->query.fetchlock);
if (client->query.fetch != NULL) {
/*
@ -6076,6 +6160,10 @@ ns_query_recurse(ns_client_t *client, dns_rdatatype_t qtype, dns_name_t *qname,
peeraddr = &client->peeraddr;
}
if (dns_view_staleanswerenabled(client->view)) {
client->query.fetchoptions |= DNS_FETCHOPT_TRYSTALE_ONTIMEOUT;
}
isc_nmhandle_attach(client->handle, &client->fetchhandle);
result = dns_resolver_createfetch(
client->view->resolver, qname, qtype, qdomain, nameservers,
@ -7678,7 +7766,9 @@ query_addanswer(query_ctx_t *qctx) {
query_filter64(qctx);
ns_client_putrdataset(qctx->client, &qctx->rdataset);
} else {
if (!qctx->is_zone && RECURSIONOK(qctx->client)) {
if (!qctx->is_zone && RECURSIONOK(qctx->client) &&
!QUERY_STALEONLY(qctx->client))
{
query_prefetch(qctx->client, qctx->fname,
qctx->rdataset);
}
@ -7786,7 +7876,7 @@ query_respond(query_ctx_t *qctx) {
* We shouldn't ever fail to add 'rdataset'
* because it's already in the answer.
*/
INSIST(qctx->rdataset == NULL);
INSIST(qctx->rdataset == NULL || QUERY_ANSWERED(qctx->client));
query_addauth(qctx);
@ -11271,7 +11361,7 @@ ns_query_done(query_ctx_t *qctx) {
* If we're recursing then just return; the query will
* resume when recursion ends.
*/
if (RECURSING(qctx->client)) {
if (RECURSING(qctx->client) && !QUERY_STALEONLY(qctx->client)) {
return (qctx->result);
}
@ -11282,8 +11372,10 @@ ns_query_done(query_ctx_t *qctx) {
* to the AA bit if the auth-nxdomain config option
* says so, then render and send the response.
*/
query_setup_sortlist(qctx);
query_glueanswer(qctx);
if (!QUERY_ANSWERED(qctx->client)) {
query_setup_sortlist(qctx);
query_glueanswer(qctx);
}
if (qctx->client->message->rcode == dns_rcode_nxdomain &&
qctx->view->auth_nxdomain)