Only refresh RRset once

Don't attempt to resolve DNS responses for intermediate results. This may create multiple refreshes and can cause a crash. One scenario is where for the query there is a CNAME and canonical answer in cache that are both stale. This will trigger a refresh of the RRsets because we encountered stale data and we prioritized it over the lookup. It will trigger a refresh of both RRsets. When we start recursing, it will detect a recursion loop because the recursion parameters will eventually be the same. In 'dns_resolver_destroyfetch' the sanity check fails, one of the callers did not get its event back before trying to destroy the fetch. Move the call to 'query_refresh_rrset' to 'ns_query_done', so that it is only called once per client request. Another scenario is where for the query there is a stale CNAME in the cache that points to a record that is also in cache but not stale. This will trigger a refresh of the RRset (because we encountered stale data and we prioritized it over the lookup). We mark RRsets that we add to the message with DNS_RDATASETATTR_STALE_ADDED to prevent adding a duplicate RRset when a stale lookup and a normal lookup conflict with each other. However, the other non-stale RRset when following a CNAME chain will be added to the message without setting that attribute, because it is not stale. This is a variant of the bug in #2594. The fix covered the same crash but for stale-answer-client-timeout > 0. Fix this by clearing all RRsets from the message before refreshing. This requires the refresh to happen after the query is send back to the client. (cherry picked from commit d939d2ecde5639d11acd6eac33a997b3e3c78b02)
2025-08-30 14:07:59 +00:00 · 2022-09-02 16:50:39 +02:00 · 2022-09-02 16:50:39 +02:00 · b9e2f3333d
commit b9e2f3333d
parent df94f18583
2 changed files with 27 additions and 16 deletions
--- a/lib/ns/include/ns/query.h
+++ b/lib/ns/include/ns/query.h
@ -148,6 +148,7 @@ struct query_ctx {
 	bool authoritative;		    /* authoritative query? */
 	bool want_restart;		    /* CNAME chain or other
 					     * restart needed */
+	bool		refresh_rrset;	    /* stale RRset refresh needed */
 	bool		need_wildcardproof; /* wildcard proof needed */
 	bool		nxrewrite;	    /* negative answer from RPZ */
 	bool		findcoveringnsec;   /* lookup covering NSEC */
--- a/lib/ns/query.c
+++ b/lib/ns/query.c
@ -5829,7 +5829,6 @@ query_lookup(query_ctx_t *qctx) {
 	bool dbfind_stale = false;
 	bool stale_timeout = false;
 	bool stale_found = false;
-	bool refresh_rrset = false;
 	bool stale_refresh_window = false;
 	uint16_t ede = 0;

@ -6027,8 +6026,7 @@ query_lookup(query_ctx_t *qctx) {
 					"%s stale answer used, an attempt to "
 					"refresh the RRset will still be made",
 					namebuf);
-				refresh_rrset = STALE(qctx->rdataset);
-				qctx->client->nodetach = refresh_rrset;
+				qctx->refresh_rrset = STALE(qctx->rdataset);
 				ns_client_extendederror(
 					qctx->client, ede,
 					"stale data prioritized over lookup");
@ -6072,17 +6070,6 @@ query_lookup(query_ctx_t *qctx) {

 	result = query_gotanswer(qctx, result);

-	if (refresh_rrset) {
-		/*
-		 * If we reached this point then it means that we have found a
-		 * stale RRset entry in cache and BIND is configured to allow
-		 * queries to be answered with stale data if no active RRset
-		 * is available, i.e. "stale-anwer-client-timeout 0". But, we
-		 * still need to refresh the RRset.
-		 */
-		query_refresh_rrset(qctx);
-	}
-
 cleanup:
 	return (result);
 }
@ -8102,11 +8089,14 @@ query_addanswer(query_ctx_t *qctx) {

 	/*
 	 * On normal lookups, clear any rdatasets that were added on a
-	 * lookup due to stale-answer-client-timeout.
+	 * lookup due to stale-answer-client-timeout. Do not clear if we
+	 * are going to refresh the RRset, because the stale contents are
+	 * prioritized.
 	 */
 	if (QUERY_STALEOK(&qctx->client->query) &&
-	    !QUERY_STALETIMEOUT(&qctx->client->query))
+	    !QUERY_STALETIMEOUT(&qctx->client->query) && !qctx->refresh_rrset)
 	{
+		CCTRACE(ISC_LOG_DEBUG(3), "query_clear_stale");
 		query_clear_stale(qctx->client);
 		/*
 		 * We can clear the attribute to prevent redundant clearing
@ -11864,9 +11854,29 @@ ns_query_done(query_ctx_t *qctx) {
 	/*
 	 * Client may have been detached after query_send(), so
 	 * we test and store the flag state here, for safety.
+	 * If we are refreshing the RRSet, we must not detach from the client
+	 * in the query_send(), so we need to override the flag.
 	 */
+	if (qctx->refresh_rrset) {
+		qctx->client->nodetach = true;
+	}
 	nodetach = qctx->client->nodetach;
 	query_send(qctx->client);
+
+	if (qctx->refresh_rrset) {
+		/*
+		 * If we reached this point then it means that we have found a
+		 * stale RRset entry in cache and BIND is configured to allow
+		 * queries to be answered with stale data if no active RRset
+		 * is available, i.e. "stale-anwer-client-timeout 0". But, we
+		 * still need to refresh the RRset. To prevent adding duplicate
+		 * RRsets, clear the RRsets from the message before doing the
+		 * refresh.
+		 */
+		message_clearrdataset(qctx->client->message, 0);
+		query_refresh_rrset(qctx);
+	}
+
 	if (!nodetach) {
 		qctx->detach_client = true;
 	}