From 5fb8e555bc115b8be94bf08404e65ac7d12b37a3 Mon Sep 17 00:00:00 2001 From: Matthijs Mekking Date: Wed, 28 Sep 2022 15:13:11 +0200 Subject: [PATCH 1/3] Add new recursion type for refreshing stale RRset Refreshing a stale RRset is similar to a prefetch query, so we can refactor this code to use the new recursion types introduced in !5883. --- lib/ns/include/ns/query.h | 7 +++++++ lib/ns/query.c | 31 +++++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/lib/ns/include/ns/query.h b/lib/ns/include/ns/query.h index c1c7b5e430..6413326bad 100644 --- a/lib/ns/include/ns/query.h +++ b/lib/ns/include/ns/query.h @@ -46,6 +46,7 @@ typedef enum { RECTYPE_NORMAL, RECTYPE_PREFETCH, RECTYPE_RPZ, + RECTYPE_STALE_REFRESH, RECTYPE_HOOK, RECTYPE_COUNT, } ns_query_rectype_t; @@ -60,6 +61,8 @@ typedef enum { ((client)->query.recursions[RECTYPE_PREFETCH].handle) #define HANDLE_RECTYPE_RPZ(client) \ ((client)->query.recursions[RECTYPE_RPZ].handle) +#define HANDLE_RECTYPE_STALE_REFRESH(client) \ + ((client)->query.recursions[RECTYPE_STALE_REFRESH].handle) #define HANDLE_RECTYPE_HOOK(client) \ ((client)->query.recursions[RECTYPE_HOOK].handle) @@ -73,6 +76,8 @@ typedef enum { ((client)->query.recursions[RECTYPE_PREFETCH].fetch) #define FETCH_RECTYPE_RPZ(client) \ ((client)->query.recursions[RECTYPE_RPZ].fetch) +#define FETCH_RECTYPE_STALE_REFRESH(client) \ + ((client)->query.recursions[RECTYPE_STALE_REFRESH].fetch) #define FETCH_RECTYPE_HOOK(client) \ ((client)->query.recursions[RECTYPE_HOOK].fetch) @@ -86,6 +91,8 @@ typedef enum { ((client)->query.recursions[RECTYPE_PREFETCH].quota) #define QUOTA_RECTYPE_RPZ(client) \ ((client)->query.recursions[RECTYPE_RPZ].quota) +#define QUOTA_RECTYPE_STALE_REFRESH(client) \ + ((client)->query.recursions[RECTYPE_STALE_REFRESH].quota) #define QUOTA_RECTYPE_HOOK(client) \ ((client)->query.recursions[RECTYPE_HOOK].quota) diff --git a/lib/ns/query.c b/lib/ns/query.c index ebdd73ee31..8039d0f439 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -2523,7 +2523,7 @@ recursionquotatype_detach(ns_client_t *client, } static void -cleanup_after_fetch(isc_task_t *task, isc_event_t *event, +cleanup_after_fetch(isc_task_t *task, isc_event_t *event, const char *ctracestr, ns_query_rectype_t recursion_type) { dns_fetchevent_t *devent = (dns_fetchevent_t *)event; isc_nmhandle_t **handlep; @@ -2537,7 +2537,7 @@ cleanup_after_fetch(isc_task_t *task, isc_event_t *event, REQUIRE(NS_CLIENT_VALID(client)); REQUIRE(task == client->manager->task); - CTRACE(ISC_LOG_DEBUG(3), "prefetch_done"); + CTRACE(ISC_LOG_DEBUG(3), ctracestr); handlep = &client->query.recursions[recursion_type].handle; fetchp = &client->query.recursions[recursion_type].fetch; @@ -2557,12 +2557,17 @@ cleanup_after_fetch(isc_task_t *task, isc_event_t *event, static void prefetch_done(isc_task_t *task, isc_event_t *event) { - cleanup_after_fetch(task, event, RECTYPE_PREFETCH); + cleanup_after_fetch(task, event, "prefetch_done", RECTYPE_PREFETCH); } static void rpzfetch_done(isc_task_t *task, isc_event_t *event) { - cleanup_after_fetch(task, event, RECTYPE_RPZ); + cleanup_after_fetch(task, event, "rpzfetch_done", RECTYPE_RPZ); +} + +static void +refresh_done(isc_task_t *task, isc_event_t *event) { + cleanup_after_fetch(task, event, "refresh_done", RECTYPE_REFRESH); } /* @@ -2606,6 +2611,10 @@ fetch_and_forget(ns_client_t *client, dns_name_t *qname, dns_rdatatype_t qtype, options = client->query.fetchoptions; action = rpzfetch_done; break; + case RECTYPE_REFRESH: + options = client->query.fetchoptions; + action = refresh_done; + break; default: UNREACHABLE(); } @@ -2646,6 +2655,20 @@ query_prefetch(ns_client_t *client, dns_name_t *qname, ns_statscounter_prefetch); } +/* +static void +query_refresh(ns_client_t *client, dns_name_t *qname, + dns_rdataset_t *rdataset) { + CTRACE(ISC_LOG_DEBUG(3), "query_refresh"); + + if (FETCH_RECTYPE_REFRESH(client) != NULL) { + return; + } + + fetch_and_forget(client, qname, rdataset->type, RECTYPE_REFRESH); +} +*/ + static void rpz_clean(dns_zone_t **zonep, dns_db_t **dbp, dns_dbnode_t **nodep, dns_rdataset_t **rdatasetp) { From 64d51285d54ed027b8f0fe74844715d3f1e638b5 Mon Sep 17 00:00:00 2001 From: Matthijs Mekking Date: Thu, 29 Sep 2022 09:57:46 +0200 Subject: [PATCH 2/3] Reuse recursion type code for refresh stale RRset Refreshing a stale RRset is similar to prefetching an RRset, so reuse the existing code. When refreshing an RRset we need to clear all db options related to serve-stale so that stale RRsets in cache are ignored during the refresh. We no longer need to set the "nodetach" flag, because the refresh fetch is now a "fetch and forget". So we can detach from the client in the query_send(). This code will break some serve-stale test cases, this will be fixed in the successor commit. TODO: add explanation why the serve-stale test cases fail. --- lib/ns/query.c | 110 +++++++++++-------------------------------------- 1 file changed, 24 insertions(+), 86 deletions(-) diff --git a/lib/ns/query.c b/lib/ns/query.c index 8039d0f439..6cb95f676b 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -2543,6 +2543,7 @@ cleanup_after_fetch(isc_task_t *task, isc_event_t *event, const char *ctracestr, fetchp = &client->query.recursions[recursion_type].fetch; LOCK(&client->query.fetchlock); + if (*fetchp != NULL) { INSIST(devent->fetch == *fetchp); *fetchp = NULL; @@ -2550,7 +2551,6 @@ cleanup_after_fetch(isc_task_t *task, isc_event_t *event, const char *ctracestr, UNLOCK(&client->query.fetchlock); recursionquotatype_detach(client, recursion_type); - free_devent(client, &event, &devent); isc_nmhandle_detach(handlep); } @@ -2566,8 +2566,9 @@ rpzfetch_done(isc_task_t *task, isc_event_t *event) { } static void -refresh_done(isc_task_t *task, isc_event_t *event) { - cleanup_after_fetch(task, event, "refresh_done", RECTYPE_REFRESH); +stale_refresh_done(isc_task_t *task, isc_event_t *event) { + cleanup_after_fetch(task, event, "stale_refresh_done", + RECTYPE_STALE_REFRESH); } /* @@ -2611,9 +2612,9 @@ fetch_and_forget(ns_client_t *client, dns_name_t *qname, dns_rdatatype_t qtype, options = client->query.fetchoptions; action = rpzfetch_done; break; - case RECTYPE_REFRESH: + case RECTYPE_STALE_REFRESH: options = client->query.fetchoptions; - action = refresh_done; + action = stale_refresh_done; break; default: UNREACHABLE(); @@ -2655,19 +2656,29 @@ query_prefetch(ns_client_t *client, dns_name_t *qname, ns_statscounter_prefetch); } -/* static void -query_refresh(ns_client_t *client, dns_name_t *qname, - dns_rdataset_t *rdataset) { - CTRACE(ISC_LOG_DEBUG(3), "query_refresh"); +query_stale_refresh(ns_client_t *client) { + dns_name_t *qname; - if (FETCH_RECTYPE_REFRESH(client) != NULL) { + CTRACE(ISC_LOG_DEBUG(3), "query_stale_refresh"); + + if (FETCH_RECTYPE_STALE_REFRESH(client) != NULL) { return; } - fetch_and_forget(client, qname, rdataset->type, RECTYPE_REFRESH); + client->query.dboptions &= ~(DNS_DBFIND_STALETIMEOUT | + DNS_DBFIND_STALEOK | + DNS_DBFIND_STALEENABLED); + + if (client->query.origqname != NULL) { + qname = client->query.origqname; + } else { + qname = client->query.qname; + } + + fetch_and_forget(client, qname, client->query.qtype, + RECTYPE_STALE_REFRESH); } -*/ static void rpz_clean(dns_zone_t **zonep, dns_db_t **dbp, dns_dbnode_t **nodep, @@ -5225,26 +5236,6 @@ qctx_init(ns_client_t *client, dns_fetchevent_t **eventp, dns_rdatatype_t qtype, CALL_HOOK_NORETURN(NS_QUERY_QCTX_INITIALIZED, qctx); } -/* - * Make 'dst' and exact copy of 'src', with exception of the - * option field, which is reset to zero. - * This function also attaches dst's view and db to the src's - * view and cachedb. - */ -static void -qctx_copy(const query_ctx_t *qctx, query_ctx_t *dst) { - REQUIRE(qctx != NULL); - REQUIRE(dst != NULL); - - memmove(dst, qctx, sizeof(*dst)); - dst->view = NULL; - dst->db = NULL; - dst->options = 0; - dns_view_attach(qctx->view, &dst->view); - dns_db_attach(qctx->view->cachedb, &dst->db); - CCTRACE(ISC_LOG_DEBUG(3), "qctx_copy"); -} - /*% * Clean up and disassociate the rdataset and node pointers in qctx. */ @@ -5749,54 +5740,6 @@ qctx_prepare_buffers(query_ctx_t *qctx, isc_buffer_t *buffer) { return (ISC_R_SUCCESS); } -/* - * Setup a new query context for resolving a query. - * - * This function is only called if both these conditions are met: - * 1. BIND is configured with stale-answer-client-timeout 0. - * 2. A stale RRset is found in cache during initial query - * database lookup. - * - * We continue with this function for refreshing/resolving an RRset - * after answering a client with stale data. - */ -static void -query_refresh_rrset(query_ctx_t *orig_qctx) { - isc_buffer_t buffer; - query_ctx_t qctx; - - REQUIRE(orig_qctx != NULL); - REQUIRE(orig_qctx->client != NULL); - - qctx_copy(orig_qctx, &qctx); - qctx.client->query.dboptions &= ~(DNS_DBFIND_STALETIMEOUT | - DNS_DBFIND_STALEOK | - DNS_DBFIND_STALEENABLED); - - /* - * We'll need some resources... - */ - if (qctx_prepare_buffers(&qctx, &buffer) != ISC_R_SUCCESS) { - dns_db_detach(&qctx.db); - qctx_destroy(&qctx); - return; - } - - /* - * Pretend we didn't find anything in cache. - */ - (void)query_gotanswer(&qctx, ISC_R_NOTFOUND); - - if (qctx.fname != NULL) { - ns_client_releasename(qctx.client, &qctx.fname); - } - if (qctx.rdataset != NULL) { - ns_client_putrdataset(qctx.client, &qctx.rdataset); - } - - qctx_destroy(&qctx); -} - /*% * Perform a local database lookup, in either an authoritative or * cache database. If unable to answer, call ns_query_done(); otherwise @@ -11548,12 +11491,7 @@ ns_query_done(query_ctx_t *qctx) { /* * Client may have been detached after query_send(), so * we test and store the flag state here, for safety. - * If we are refreshing the RRSet, we must not detach from the client - * in the query_send(), so we need to override the flag. */ - if (qctx->refresh_rrset) { - qctx->client->nodetach = true; - } nodetach = qctx->client->nodetach; query_send(qctx->client); @@ -11568,7 +11506,7 @@ ns_query_done(query_ctx_t *qctx) { * refresh. */ message_clearrdataset(qctx->client->message, 0); - query_refresh_rrset(qctx); + query_stale_refresh(qctx->client); } if (!nodetach) { From 0681b152259dfd9f37579043ab5c99ec9760ee2f Mon Sep 17 00:00:00 2001 From: Matthijs Mekking Date: Fri, 30 Sep 2022 11:16:22 +0200 Subject: [PATCH 3/3] If refresh stale RRset times out, start stale-refresh-time The previous commit failed some tests because we expect that if a fetch fails and we have stale candidates in cache, the stale-refresh-time window is started. This means that if we hit a stale entry in cache and answering stale data is allowed, we don't bother resolving it again for as long we are within the stale-refresh-time window. This is useful for two reasons: - If we failed to fetch the RRset that we are looking for, we are not hammering the authoritative servers. - Successor clients don't need to wait for stale-answer-client-timeout to get their DNS response, only the first one to query will take the latency penalty. The latter is not useful when stale-answer-client-timeout is 0 though. So this exception code only to make sure we don't try to refresh the RRset again if it failed to do so recently. --- bin/tests/system/serve-stale/tests.sh | 2 +- lib/ns/query.c | 98 ++++++++++++++++++++++++++- 2 files changed, 98 insertions(+), 2 deletions(-) diff --git a/bin/tests/system/serve-stale/tests.sh b/bin/tests/system/serve-stale/tests.sh index d38bc7ef64..0b15553398 100755 --- a/bin/tests/system/serve-stale/tests.sh +++ b/bin/tests/system/serve-stale/tests.sh @@ -2062,7 +2062,7 @@ status=$((status+ret)) n=$((n+1)) ret=0 echo_i "wait until resolver query times out, activating stale-refresh-time" -wait_for_log 15 "data.example resolver failure, stale answer used" ns3/named.run || ret=1 +wait_for_log 15 "data.example/TXT stale refresh failed: timed out" ns3/named.run || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) diff --git a/lib/ns/query.c b/lib/ns/query.c index 6cb95f676b..53c2b9eacb 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -398,6 +398,15 @@ static void qctx_init(ns_client_t *client, dns_fetchevent_t **eventp, dns_rdatatype_t qtype, query_ctx_t *qctx); +static isc_result_t +qctx_prepare_buffers(query_ctx_t *qctx, isc_buffer_t *buffer); + +static void +qctx_freedata(query_ctx_t *qctx); + +static void +qctx_destroy(query_ctx_t *qctx); + static isc_result_t query_setup(ns_client_t *client, dns_rdatatype_t qtype); @@ -2522,6 +2531,87 @@ recursionquotatype_detach(ns_client_t *client, ns_statscounter_recursclients); } +static void +stale_refresh_aftermath(ns_client_t *client, isc_result_t result) { + dns_db_t *db = NULL; + unsigned int dboptions; + isc_buffer_t buffer; + query_ctx_t qctx; + dns_clientinfomethods_t cm; + dns_clientinfo_t ci; + char namebuf[DNS_NAME_FORMATSIZE]; + char typebuf[DNS_RDATATYPE_FORMATSIZE]; + + /* + * If refreshing a stale RRset failed, we need to set the + * stale-refresh-time window, so that on future requests for this + * RRset the stale entry may be used immediately. + */ + switch (result) { + case ISC_R_SUCCESS: + case DNS_R_GLUE: + case DNS_R_ZONECUT: + case ISC_R_NOTFOUND: + case DNS_R_DELEGATION: + case DNS_R_EMPTYNAME: + case DNS_R_NXRRSET: + case DNS_R_EMPTYWILD: + case DNS_R_NXDOMAIN: + case DNS_R_COVERINGNSEC: + case DNS_R_NCACHENXDOMAIN: + case DNS_R_NCACHENXRRSET: + case DNS_R_CNAME: + case DNS_R_DNAME: + break; + default: + dns_name_format(client->query.qname, namebuf, sizeof(namebuf)); + dns_rdatatype_format(client->query.qtype, typebuf, + sizeof(typebuf)); + ns_client_log(client, NS_LOGCATEGORY_SERVE_STALE, + NS_LOGMODULE_QUERY, ISC_LOG_NOTICE, + "%s/%s stale refresh failed: timed out", namebuf, + typebuf); + + /* + * Set up a short lived query context, solely to set the + * last refresh failure time on the RRset in the cache + * database, starting the stale-refresh-time window for it. + * This is a condensed form of query_lookup(). + */ + isc_stdtime_get(&client->now); + client->query.attributes &= ~NS_QUERYATTR_RECURSIONOK; + qctx_init(client, NULL, 0, &qctx); + + dns_clientinfomethods_init(&cm, ns_client_sourceip); + dns_clientinfo_init( + &ci, qctx.client, + HAVEECS(qctx.client) ? &qctx.client->ecs : NULL, NULL); + + result = qctx_prepare_buffers(&qctx, &buffer); + if (result != ISC_R_SUCCESS) { + goto cleanup; + } + + dboptions = qctx.client->query.dboptions; + dboptions |= DNS_DBFIND_STALEOK; + dboptions |= DNS_DBFIND_STALESTART; + + dns_db_attach(qctx.client->view->cachedb, &db); + (void)dns_db_findext(db, qctx.client->query.qname, NULL, + qctx.client->query.qtype, dboptions, + qctx.client->now, &qctx.node, qctx.fname, + &cm, &ci, qctx.rdataset, qctx.sigrdataset); + if (qctx.node != NULL) { + dns_db_detachnode(db, &qctx.node); + } + dns_db_detach(&db); + + cleanup: + qctx_freedata(&qctx); + qctx_destroy(&qctx); + } +} + static void cleanup_after_fetch(isc_task_t *task, isc_event_t *event, const char *ctracestr, ns_query_rectype_t recursion_type) { @@ -2529,6 +2619,7 @@ cleanup_after_fetch(isc_task_t *task, isc_event_t *event, const char *ctracestr, isc_nmhandle_t **handlep; dns_fetch_t **fetchp; ns_client_t *client; + isc_result_t result; UNUSED(task); @@ -2541,15 +2632,20 @@ cleanup_after_fetch(isc_task_t *task, isc_event_t *event, const char *ctracestr, handlep = &client->query.recursions[recursion_type].handle; fetchp = &client->query.recursions[recursion_type].fetch; + result = devent->result; LOCK(&client->query.fetchlock); - if (*fetchp != NULL) { INSIST(devent->fetch == *fetchp); *fetchp = NULL; } UNLOCK(&client->query.fetchlock); + /* Some type of recursions require a bit of aftermath. */ + if (recursion_type == RECTYPE_STALE_REFRESH) { + stale_refresh_aftermath(client, result); + } + recursionquotatype_detach(client, recursion_type); free_devent(client, &event, &devent); isc_nmhandle_detach(handlep);