From d939d2ecde5639d11acd6eac33a997b3e3c78b02 Mon Sep 17 00:00:00 2001 From: Matthijs Mekking Date: Fri, 2 Sep 2022 16:50:39 +0200 Subject: [PATCH] Only refresh RRset once Don't attempt to resolve DNS responses for intermediate results. This may create multiple refreshes and can cause a crash. One scenario is where for the query there is a CNAME and canonical answer in cache that are both stale. This will trigger a refresh of the RRsets because we encountered stale data and we prioritized it over the lookup. It will trigger a refresh of both RRsets. When we start recursing, it will detect a recursion loop because the recursion parameters will eventually be the same. In 'dns_resolver_destroyfetch' the sanity check fails, one of the callers did not get its event back before trying to destroy the fetch. Move the call to 'query_refresh_rrset' to 'ns_query_done', so that it is only called once per client request. Another scenario is where for the query there is a stale CNAME in the cache that points to a record that is also in cache but not stale. This will trigger a refresh of the RRset (because we encountered stale data and we prioritized it over the lookup). We mark RRsets that we add to the message with DNS_RDATASETATTR_STALE_ADDED to prevent adding a duplicate RRset when a stale lookup and a normal lookup conflict with each other. However, the other non-stale RRset when following a CNAME chain will be added to the message without setting that attribute, because it is not stale. This is a variant of the bug in #2594. The fix covered the same crash but for stale-answer-client-timeout > 0. Fix this by clearing all RRsets from the message before refreshing. This requires the refresh to happen after the query is send back to the client. --- lib/ns/include/ns/query.h | 1 + lib/ns/query.c | 42 ++++++++++++++++++++++++--------------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/lib/ns/include/ns/query.h b/lib/ns/include/ns/query.h index 843de665ea..c1c7b5e430 100644 --- a/lib/ns/include/ns/query.h +++ b/lib/ns/include/ns/query.h @@ -203,6 +203,7 @@ struct query_ctx { bool authoritative; /* authoritative query? */ bool want_restart; /* CNAME chain or other * restart needed */ + bool refresh_rrset; /* stale RRset refresh needed */ bool need_wildcardproof; /* wildcard proof needed */ bool nxrewrite; /* negative answer from RPZ */ bool findcoveringnsec; /* lookup covering NSEC */ diff --git a/lib/ns/query.c b/lib/ns/query.c index beeae0e653..dd2d2f2e0a 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -5780,7 +5780,6 @@ query_lookup(query_ctx_t *qctx) { bool dbfind_stale = false; bool stale_timeout = false; bool stale_found = false; - bool refresh_rrset = false; bool stale_refresh_window = false; uint16_t ede = 0; @@ -5980,8 +5979,7 @@ query_lookup(query_ctx_t *qctx) { "%s stale answer used, an attempt to " "refresh the RRset will still be made", namebuf); - refresh_rrset = STALE(qctx->rdataset); - qctx->client->nodetach = refresh_rrset; + qctx->refresh_rrset = STALE(qctx->rdataset); ns_client_extendederror( qctx->client, ede, "stale data prioritized over lookup"); @@ -6025,17 +6023,6 @@ query_lookup(query_ctx_t *qctx) { result = query_gotanswer(qctx, result); - if (refresh_rrset) { - /* - * If we reached this point then it means that we have found a - * stale RRset entry in cache and BIND is configured to allow - * queries to be answered with stale data if no active RRset - * is available, i.e. "stale-anwer-client-timeout 0". But, we - * still need to refresh the RRset. - */ - query_refresh_rrset(qctx); - } - cleanup: return (result); } @@ -7987,11 +7974,14 @@ query_addanswer(query_ctx_t *qctx) { /* * On normal lookups, clear any rdatasets that were added on a - * lookup due to stale-answer-client-timeout. + * lookup due to stale-answer-client-timeout. Do not clear if we + * are going to refresh the RRset, because the stale contents are + * prioritized. */ if (QUERY_STALEOK(&qctx->client->query) && - !QUERY_STALETIMEOUT(&qctx->client->query)) + !QUERY_STALETIMEOUT(&qctx->client->query) && !qctx->refresh_rrset) { + CCTRACE(ISC_LOG_DEBUG(3), "query_clear_stale"); query_clear_stale(qctx->client); /* * We can clear the attribute to prevent redundant clearing @@ -11523,9 +11513,29 @@ ns_query_done(query_ctx_t *qctx) { /* * Client may have been detached after query_send(), so * we test and store the flag state here, for safety. + * If we are refreshing the RRSet, we must not detach from the client + * in the query_send(), so we need to override the flag. */ + if (qctx->refresh_rrset) { + qctx->client->nodetach = true; + } nodetach = qctx->client->nodetach; query_send(qctx->client); + + if (qctx->refresh_rrset) { + /* + * If we reached this point then it means that we have found a + * stale RRset entry in cache and BIND is configured to allow + * queries to be answered with stale data if no active RRset + * is available, i.e. "stale-anwer-client-timeout 0". But, we + * still need to refresh the RRset. To prevent adding duplicate + * RRsets, clear the RRsets from the message before doing the + * refresh. + */ + message_clearrdataset(qctx->client->message, 0); + query_refresh_rrset(qctx); + } + if (!nodetach) { qctx->detach_client = true; }