From b90bad93cb960e87fb858de4c1b4583cceb802c2 Mon Sep 17 00:00:00 2001 From: Matthijs Mekking Date: Thu, 20 Apr 2023 16:22:53 +0200 Subject: [PATCH] Fix serve-stale bug when cache has no data We recently fixed a bug where in some cases (when following an expired CNAME for example), named could return SERVFAIL if the target record is still valid (see isc-projects/bind9#3678, and isc-projects/bind9!7096). We fixed this by considering non-stale RRsets as well during the stale lookup. However, this triggered a new bug because despite the answer from cache not being stale, the lookup may be triggered by serve-stale. If the answer from database is not stale, the fix in isc-projects/bind9!7096 erroneously skips the serve-stale logic. Add 'answer_found' checks to the serve-stale logic to fix this issue. (cherry picked from commit bbd163acf67843c76099921e467dd0ef90f3f670) --- lib/ns/query.c | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/lib/ns/query.c b/lib/ns/query.c index 63f97b4ec1..1fab263a35 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -5852,6 +5852,27 @@ query_refresh_rrset(query_ctx_t *orig_qctx) { qctx_destroy(&qctx); } +/*% + * Depending on the db lookup result, we can respond to the + * client this stale answer. + */ +static bool +stale_client_answer(isc_result_t result) { + switch (result) { + case ISC_R_SUCCESS: + case DNS_R_EMPTYNAME: + case DNS_R_NXRRSET: + case DNS_R_NCACHENXRRSET: + case DNS_R_CNAME: + case DNS_R_DNAME: + return (true); + default: + return (false); + } + + UNREACHABLE(); +} + /*% * Perform a local database lookup, in either an authoritative or * cache database. If unable to answer, call ns_query_done(); otherwise @@ -5983,7 +6004,6 @@ query_lookup(query_ctx_t *qctx) { { /* Found non-stale usable rdataset. */ answer_found = true; - goto gotanswer; } if (dbfind_stale || stale_refresh_window || stale_timeout) { @@ -6019,7 +6039,7 @@ query_lookup(query_ctx_t *qctx) { if (stale_found) { ns_client_extendederror(qctx->client, ede, "resolver failure"); - } else { + } else if (!answer_found) { /* * Resolver failure, no stale data, nothing more we * can do, return SERVFAIL. @@ -6042,7 +6062,7 @@ query_lookup(query_ctx_t *qctx) { ns_client_extendederror( qctx->client, ede, "query within stale refresh time window"); - } else { + } else if (!answer_found) { /* * During the stale refresh window explicitly do not try * to refresh the data, because a recent lookup failed. @@ -6052,7 +6072,7 @@ query_lookup(query_ctx_t *qctx) { } } else if (stale_timeout) { if ((qctx->options & DNS_GETDB_STALEFIRST) != 0) { - if (!stale_found) { + if (!stale_found && !answer_found) { /* * We have nothing useful in cache to return * immediately. @@ -6069,7 +6089,7 @@ query_lookup(query_ctx_t *qctx) { &qctx->client->query.fetch); } return (query_lookup(qctx)); - } else { + } else if (stale_client_answer(result)) { /* * Immediately return the stale answer, start a * resolver fetch to refresh the data in cache. @@ -6081,9 +6101,12 @@ query_lookup(query_ctx_t *qctx) { "refresh the RRset will still be made", namebuf); qctx->refresh_rrset = STALE(qctx->rdataset); - ns_client_extendederror( - qctx->client, ede, - "stale data prioritized over lookup"); + if (stale_found) { + ns_client_extendederror( + qctx->client, ede, + "stale data prioritized over " + "lookup"); + } } } else { /* @@ -6099,7 +6122,11 @@ query_lookup(query_ctx_t *qctx) { if (stale_found) { ns_client_extendederror(qctx->client, ede, "client timeout"); - } else { + } else if (!answer_found) { + return (result); + } + + if (!stale_client_answer(result)) { return (result); } @@ -6112,7 +6139,6 @@ query_lookup(query_ctx_t *qctx) { } } -gotanswer: if (stale_timeout && (answer_found || stale_found)) { /* * Mark RRsets that we are adding to the client message on a