mirror of
https://gitlab.isc.org/isc-projects/bind9
synced 2025-09-03 08:05:21 +00:00
chg: usr: Improve the LRU cache-expiration mechanism
Improve the LRU cache-expiration mechanism to a SIEVE-LRU based mechanism that triggers when the cache is close to the `max-cache-size` limit. This improves the recursive server performance. Merge branch 'ondrej/sieve' into 'main' See merge request isc-projects/bind9!10153
This commit is contained in:
@@ -85,14 +85,13 @@ struct dns_slabheader {
|
|||||||
* when the "cyclic" rrset-order is required.
|
* when the "cyclic" rrset-order is required.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
unsigned int resign_lsb : 1;
|
/* resigning (zone) and TTL-cleaning (cache) */
|
||||||
|
uint16_t resign_lsb : 1;
|
||||||
isc_stdtime_t resign;
|
isc_stdtime_t resign;
|
||||||
|
isc_heap_t *heap;
|
||||||
unsigned int heap_index;
|
unsigned int heap_index;
|
||||||
/*%<
|
|
||||||
* Used for TTL-based cache cleaning.
|
|
||||||
*/
|
|
||||||
|
|
||||||
isc_stdtime_t last_used;
|
/* Used for stale refresh */
|
||||||
_Atomic(uint32_t) last_refresh_fail_ts;
|
_Atomic(uint32_t) last_refresh_fail_ts;
|
||||||
|
|
||||||
dns_slabheader_proof_t *noqname;
|
dns_slabheader_proof_t *noqname;
|
||||||
@@ -127,7 +126,12 @@ struct dns_slabheader {
|
|||||||
* this rdataset, if any.
|
* this rdataset, if any.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
dns_gluelist_t *gluelist;
|
||||||
|
|
||||||
|
/*% Used for SIEVE-LRU (cache) and changed_list (zone) */
|
||||||
ISC_LINK(struct dns_slabheader) link;
|
ISC_LINK(struct dns_slabheader) link;
|
||||||
|
/*% Used for SIEVE-LRU */
|
||||||
|
bool visited;
|
||||||
|
|
||||||
/*%
|
/*%
|
||||||
* Case vector. If the bit is set then the corresponding
|
* Case vector. If the bit is set then the corresponding
|
||||||
@@ -135,10 +139,6 @@ struct dns_slabheader {
|
|||||||
* rendering that character upper case.
|
* rendering that character upper case.
|
||||||
*/
|
*/
|
||||||
unsigned char upper[32];
|
unsigned char upper[32];
|
||||||
|
|
||||||
isc_heap_t *heap;
|
|
||||||
|
|
||||||
dns_gluelist_t *gluelist;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
|
@@ -16,7 +16,6 @@
|
|||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#include <stdalign.h>
|
#include <stdalign.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <sys/mman.h>
|
|
||||||
|
|
||||||
#include <isc/ascii.h>
|
#include <isc/ascii.h>
|
||||||
#include <isc/async.h>
|
#include <isc/async.h>
|
||||||
@@ -34,6 +33,7 @@
|
|||||||
#include <isc/refcount.h>
|
#include <isc/refcount.h>
|
||||||
#include <isc/result.h>
|
#include <isc/result.h>
|
||||||
#include <isc/rwlock.h>
|
#include <isc/rwlock.h>
|
||||||
|
#include <isc/sieve.h>
|
||||||
#include <isc/stdio.h>
|
#include <isc/stdio.h>
|
||||||
#include <isc/string.h>
|
#include <isc/string.h>
|
||||||
#include <isc/time.h>
|
#include <isc/time.h>
|
||||||
@@ -127,20 +127,6 @@
|
|||||||
*/
|
*/
|
||||||
#define QPDB_VIRTUAL 300
|
#define QPDB_VIRTUAL 300
|
||||||
|
|
||||||
/*%
|
|
||||||
* Whether to rate-limit updating the LRU to avoid possible thread contention.
|
|
||||||
* Updating LRU requires write locking, so we don't do it every time the
|
|
||||||
* record is touched - only after some time passes.
|
|
||||||
*/
|
|
||||||
#ifndef DNS_QPDB_LIMITLRUUPDATE
|
|
||||||
#define DNS_QPDB_LIMITLRUUPDATE 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*% Time after which we update LRU for glue records, 5 minutes */
|
|
||||||
#define DNS_QPDB_LRUUPDATE_GLUE 300
|
|
||||||
/*% Time after which we update LRU for all other records, 10 minutes */
|
|
||||||
#define DNS_QPDB_LRUUPDATE_REGULAR 600
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This defines the number of headers that we try to expire each time the
|
* This defines the number of headers that we try to expire each time the
|
||||||
* expire_ttl_headers() is run. The number should be small enough, so the
|
* expire_ttl_headers() is run. The number should be small enough, so the
|
||||||
@@ -150,7 +136,8 @@
|
|||||||
#define DNS_QPDB_EXPIRE_TTL_COUNT 10
|
#define DNS_QPDB_EXPIRE_TTL_COUNT 10
|
||||||
|
|
||||||
/*%
|
/*%
|
||||||
* This is the structure that is used for each node in the qp trie of trees.
|
* This is the structure that is used for each node in the qp trie of
|
||||||
|
* trees.
|
||||||
*/
|
*/
|
||||||
typedef struct qpcnode qpcnode_t;
|
typedef struct qpcnode qpcnode_t;
|
||||||
struct qpcnode {
|
struct qpcnode {
|
||||||
@@ -224,11 +211,6 @@ typedef struct qpcache_bucket {
|
|||||||
/* Per-bucket lock. */
|
/* Per-bucket lock. */
|
||||||
isc_rwlock_t lock;
|
isc_rwlock_t lock;
|
||||||
|
|
||||||
/*
|
|
||||||
* Linked list used to implement LRU cache cleaning.
|
|
||||||
*/
|
|
||||||
dns_slabheaderlist_t lru;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The heap is used for TTL based expiry. Note that qpcache->hmctx
|
* The heap is used for TTL based expiry. Note that qpcache->hmctx
|
||||||
* is the memory context to use for heap memory; this differs from
|
* is the memory context to use for heap memory; this differs from
|
||||||
@@ -236,10 +218,14 @@ typedef struct qpcache_bucket {
|
|||||||
*/
|
*/
|
||||||
isc_heap_t *heap;
|
isc_heap_t *heap;
|
||||||
|
|
||||||
|
/* SIEVE-LRU cache cleaning state. */
|
||||||
|
ISC_SIEVE(dns_slabheader_t) sieve;
|
||||||
|
|
||||||
/* Padding to prevent false sharing between locks. */
|
/* Padding to prevent false sharing between locks. */
|
||||||
uint8_t __padding[ISC_OS_CACHELINE_SIZE -
|
uint8_t __padding[ISC_OS_CACHELINE_SIZE -
|
||||||
(sizeof(isc_queue_t) + sizeof(isc_rwlock_t) +
|
(sizeof(isc_queue_t) + sizeof(isc_rwlock_t) +
|
||||||
sizeof(dns_slabheaderlist_t) + sizeof(isc_heap_t *)) %
|
sizeof(isc_heap_t *) +
|
||||||
|
sizeof(ISC_SIEVE(dns_slabheader_t))) %
|
||||||
ISC_OS_CACHELINE_SIZE];
|
ISC_OS_CACHELINE_SIZE];
|
||||||
|
|
||||||
} qpcache_bucket_t;
|
} qpcache_bucket_t;
|
||||||
@@ -285,17 +271,6 @@ struct qpcache {
|
|||||||
*/
|
*/
|
||||||
uint32_t serve_stale_refresh;
|
uint32_t serve_stale_refresh;
|
||||||
|
|
||||||
/*
|
|
||||||
* Start point % node_lock_count for next LRU cleanup.
|
|
||||||
*/
|
|
||||||
atomic_uint lru_sweep;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* When performing LRU cleaning limit cleaning to headers that were
|
|
||||||
* last used at or before this.
|
|
||||||
*/
|
|
||||||
_Atomic(isc_stdtime_t) last_used;
|
|
||||||
|
|
||||||
/* Locked by tree_lock. */
|
/* Locked by tree_lock. */
|
||||||
dns_qp_t *tree;
|
dns_qp_t *tree;
|
||||||
dns_qp_t *nsec;
|
dns_qp_t *nsec;
|
||||||
@@ -457,6 +432,9 @@ qpcache__destroy(qpcache_t *qpdb);
|
|||||||
|
|
||||||
static dns_dbmethods_t qpdb_cachemethods;
|
static dns_dbmethods_t qpdb_cachemethods;
|
||||||
|
|
||||||
|
static void
|
||||||
|
cleanup_deadnodes_cb(void *arg);
|
||||||
|
|
||||||
/*%
|
/*%
|
||||||
* 'init_count' is used to initialize 'newheader->count' which in turn
|
* 'init_count' is used to initialize 'newheader->count' which in turn
|
||||||
* is used to determine where in the cycle rrset-order cyclic starts.
|
* is used to determine where in the cycle rrset-order cyclic starts.
|
||||||
@@ -480,116 +458,84 @@ static atomic_uint_fast16_t init_count = 0;
|
|||||||
* Failure to follow this hierarchy can result in deadlock.
|
* Failure to follow this hierarchy can result in deadlock.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*%
|
|
||||||
* Routines for LRU-based cache management.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*%
|
|
||||||
* See if a given cache entry that is being reused needs to be updated
|
|
||||||
* in the LRU-list. From the LRU management point of view, this function is
|
|
||||||
* expected to return true for almost all cases. When used with threads,
|
|
||||||
* however, this may cause a non-negligible performance penalty because a
|
|
||||||
* writer lock will have to be acquired before updating the list.
|
|
||||||
* If DNS_QPDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
|
|
||||||
* function returns true if the entry has not been updated for some period of
|
|
||||||
* time. We differentiate the NS or glue address case and the others since
|
|
||||||
* experiments have shown that the former tends to be accessed relatively
|
|
||||||
* infrequently and the cost of cache miss is higher (e.g., a missing NS records
|
|
||||||
* may cause external queries at a higher level zone, involving more
|
|
||||||
* transactions).
|
|
||||||
*
|
|
||||||
* Caller must hold the node (read or write) lock.
|
|
||||||
*/
|
|
||||||
static bool
|
|
||||||
need_headerupdate(dns_slabheader_t *header, isc_stdtime_t now) {
|
|
||||||
if (DNS_SLABHEADER_GETATTR(header, (DNS_SLABHEADERATTR_NONEXISTENT |
|
|
||||||
DNS_SLABHEADERATTR_ANCIENT |
|
|
||||||
DNS_SLABHEADERATTR_ZEROTTL)) != 0)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if DNS_QPDB_LIMITLRUUPDATE
|
|
||||||
if (header->type == dns_rdatatype_ns ||
|
|
||||||
(header->trust == dns_trust_glue &&
|
|
||||||
dns_rdatatype_isaddr(header->type)))
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Glue records are updated if at least DNS_QPDB_LRUUPDATE_GLUE
|
|
||||||
* seconds have passed since the previous update time.
|
|
||||||
*/
|
|
||||||
return header->last_used + DNS_QPDB_LRUUPDATE_GLUE <= now;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Other records are updated if DNS_QPDB_LRUUPDATE_REGULAR seconds
|
|
||||||
* have passed.
|
|
||||||
*/
|
|
||||||
return header->last_used + DNS_QPDB_LRUUPDATE_REGULAR <= now;
|
|
||||||
#else
|
|
||||||
UNUSED(now);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
#endif /* if DNS_QPDB_LIMITLRUUPDATE */
|
|
||||||
}
|
|
||||||
|
|
||||||
/*%
|
|
||||||
* Update the timestamp of a given cache entry and move it to the head
|
|
||||||
* of the corresponding LRU list.
|
|
||||||
*
|
|
||||||
* Caller must hold the node (write) lock.
|
|
||||||
*
|
|
||||||
* Note that the we do NOT touch the heap here, as the TTL has not changed.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
update_header(qpcache_t *qpdb, dns_slabheader_t *header, isc_stdtime_t now) {
|
|
||||||
/* To be checked: can we really assume this? XXXMLG */
|
|
||||||
INSIST(ISC_LINK_LINKED(header, link));
|
|
||||||
|
|
||||||
ISC_LIST_UNLINK(qpdb->buckets[HEADERNODE(header)->locknum].lru, header,
|
|
||||||
link);
|
|
||||||
header->last_used = now;
|
|
||||||
ISC_LIST_PREPEND(qpdb->buckets[HEADERNODE(header)->locknum].lru, header,
|
|
||||||
link);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
maybe_update_headers(qpcache_t *qpdb, dns_slabheader_t *found,
|
|
||||||
dns_slabheader_t *foundsig, isc_rwlock_t *nlock,
|
|
||||||
isc_rwlocktype_t *nlocktypep, isc_stdtime_t now) {
|
|
||||||
if (need_headerupdate(found, now) ||
|
|
||||||
(foundsig != NULL && need_headerupdate(foundsig, now)))
|
|
||||||
{
|
|
||||||
if (*nlocktypep != isc_rwlocktype_write) {
|
|
||||||
NODE_FORCEUPGRADE(nlock, nlocktypep);
|
|
||||||
}
|
|
||||||
if (need_headerupdate(found, now)) {
|
|
||||||
update_header(qpdb, found, now);
|
|
||||||
}
|
|
||||||
if (foundsig != NULL && need_headerupdate(foundsig, now)) {
|
|
||||||
update_header(qpdb, foundsig, now);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Locking:
|
* Cache-eviction routines.
|
||||||
* If a routine is going to lock more than one lock in this module, then
|
|
||||||
* the locking must be done in the following order:
|
|
||||||
*
|
|
||||||
* Tree Lock
|
|
||||||
*
|
|
||||||
* Node Lock (Only one from the set may be locked at one time by
|
|
||||||
* any caller)
|
|
||||||
*
|
|
||||||
* Database Lock
|
|
||||||
*
|
|
||||||
* Failure to follow this hierarchy can result in deadlock.
|
|
||||||
*
|
|
||||||
* Deleting Nodes:
|
|
||||||
* For zone databases the node for the origin of the zone MUST NOT be deleted.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
expireheader(dns_slabheader_t *header, isc_rwlocktype_t *nlocktypep,
|
||||||
|
isc_rwlocktype_t *tlocktypep, dns_expire_t reason DNS__DB_FLARG);
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
rdataset_size(dns_slabheader_t *header) {
|
||||||
|
if (EXISTS(header)) {
|
||||||
|
return dns_rdataslab_size(header);
|
||||||
|
}
|
||||||
|
|
||||||
|
return sizeof(*header);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
expire_lru_headers(qpcache_t *qpdb, uint32_t idx, size_t requested,
|
||||||
|
isc_rwlocktype_t *nlocktypep,
|
||||||
|
isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) {
|
||||||
|
size_t expired = 0;
|
||||||
|
|
||||||
|
do {
|
||||||
|
dns_slabheader_t *header =
|
||||||
|
ISC_SIEVE_NEXT(qpdb->buckets[idx].sieve, visited, link);
|
||||||
|
if (header == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ISC_SIEVE_UNLINK(qpdb->buckets[idx].sieve, header, link);
|
||||||
|
|
||||||
|
expired += rdataset_size(header);
|
||||||
|
|
||||||
|
expireheader(header, nlocktypep, tlocktypep,
|
||||||
|
dns_expire_lru DNS__DB_FLARG_PASS);
|
||||||
|
} while (expired < requested);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
qpcache_miss(qpcache_t *qpdb, dns_slabheader_t *newheader,
|
||||||
|
isc_rwlocktype_t *nlocktypep,
|
||||||
|
isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) {
|
||||||
|
uint32_t idx = HEADERNODE(newheader)->locknum;
|
||||||
|
|
||||||
|
isc_heap_insert(qpdb->buckets[idx].heap, newheader);
|
||||||
|
newheader->heap = qpdb->buckets[idx].heap;
|
||||||
|
|
||||||
|
if (isc_mem_isovermem(qpdb->common.mctx)) {
|
||||||
|
/*
|
||||||
|
* Maximum estimated size of the data being added: The size
|
||||||
|
* of the rdataset, plus a new QP database node and nodename,
|
||||||
|
* and a possible additional NSEC node and nodename. Also add
|
||||||
|
* a 12k margin for a possible QP-trie chunk allocation.
|
||||||
|
* (It's okay to overestimate, we want to get cache memory
|
||||||
|
* down quickly.)
|
||||||
|
*/
|
||||||
|
|
||||||
|
size_t purgesize =
|
||||||
|
2 * (sizeof(qpcnode_t) +
|
||||||
|
dns_name_size(&HEADERNODE(newheader)->name)) +
|
||||||
|
rdataset_size(newheader) + 12288;
|
||||||
|
|
||||||
|
expire_lru_headers(qpdb, idx, purgesize, nlocktypep,
|
||||||
|
tlocktypep DNS__DB_FLARG_PASS);
|
||||||
|
}
|
||||||
|
|
||||||
|
ISC_SIEVE_INSERT(qpdb->buckets[idx].sieve, newheader, link);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
qpcache_hit(qpcache_t *qpdb ISC_ATTR_UNUSED, dns_slabheader_t *header) {
|
||||||
|
/*
|
||||||
|
* On cache hit, we only mark the header as seen.
|
||||||
|
*/
|
||||||
|
ISC_SIEVE_MARK(header, visited);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DB Routines
|
* DB Routines
|
||||||
*/
|
*/
|
||||||
@@ -733,9 +679,6 @@ qpcnode_acquire(qpcache_t *qpdb, qpcnode_t *node, isc_rwlocktype_t nlocktype,
|
|||||||
tlocktype DNS__DB_FLARG_PASS);
|
tlocktype DNS__DB_FLARG_PASS);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
cleanup_deadnodes(void *arg);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Decrement the external references to a node. If the counter
|
* Decrement the external references to a node. If the counter
|
||||||
* goes to zero, decrement the node use counter in the qpcache object
|
* goes to zero, decrement the node use counter in the qpcache object
|
||||||
@@ -838,7 +781,8 @@ qpcnode_release(qpcache_t *qpdb, qpcnode_t *node, isc_rwlocktype_t *nlocktypep,
|
|||||||
isc_loop_t *loop = isc_loop_get(qpdb->loopmgr,
|
isc_loop_t *loop = isc_loop_get(qpdb->loopmgr,
|
||||||
node->locknum);
|
node->locknum);
|
||||||
|
|
||||||
isc_async_run(loop, cleanup_deadnodes, qpdb);
|
qpcache_ref(qpdb);
|
||||||
|
isc_async_run(loop, cleanup_deadnodes_cb, qpdb);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -921,13 +865,6 @@ setttl(dns_slabheader_t *header, isc_stdtime_t newts) {
|
|||||||
|
|
||||||
header->expire = newts;
|
header->expire = newts;
|
||||||
|
|
||||||
if (header->db == NULL || !dns_db_iscache(header->db)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is a cache. Adjust the heaps if necessary.
|
|
||||||
*/
|
|
||||||
if (header->heap == NULL || header->heap_index == 0 || newts == oldts) {
|
if (header->heap == NULL || header->heap_index == 0 || newts == oldts) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -1130,9 +1067,11 @@ bindrdatasets(qpcache_t *qpdb, qpcnode_t *qpnode, dns_slabheader_t *found,
|
|||||||
dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
|
dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
|
||||||
bindrdataset(qpdb, qpnode, found, now, nlocktype, tlocktype,
|
bindrdataset(qpdb, qpnode, found, now, nlocktype, tlocktype,
|
||||||
rdataset DNS__DB_FLARG_PASS);
|
rdataset DNS__DB_FLARG_PASS);
|
||||||
|
qpcache_hit(qpdb, found);
|
||||||
if (!NEGATIVE(found) && foundsig != NULL) {
|
if (!NEGATIVE(found) && foundsig != NULL) {
|
||||||
bindrdataset(qpdb, qpnode, foundsig, now, nlocktype, tlocktype,
|
bindrdataset(qpdb, qpnode, foundsig, now, nlocktype, tlocktype,
|
||||||
sigrdataset DNS__DB_FLARG_PASS);
|
sigrdataset DNS__DB_FLARG_PASS);
|
||||||
|
qpcache_hit(qpdb, foundsig);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1172,9 +1111,6 @@ setup_delegation(qpc_search_t *search, dns_dbnode_t **nodep,
|
|||||||
search->zonecut_sigheader, search->now, nlocktype,
|
search->zonecut_sigheader, search->now, nlocktype,
|
||||||
tlocktype, rdataset,
|
tlocktype, rdataset,
|
||||||
sigrdataset DNS__DB_FLARG_PASS);
|
sigrdataset DNS__DB_FLARG_PASS);
|
||||||
maybe_update_headers(search->qpdb, search->zonecut_header,
|
|
||||||
search->zonecut_sigheader, nlock,
|
|
||||||
&nlocktype, search->now);
|
|
||||||
NODE_UNLOCK(nlock, &nlocktype);
|
NODE_UNLOCK(nlock, &nlocktype);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1411,8 +1347,6 @@ find_deepest_zonecut(qpc_search_t *search, qpcnode_t *node,
|
|||||||
search->now, nlocktype,
|
search->now, nlocktype,
|
||||||
isc_rwlocktype_none, rdataset,
|
isc_rwlocktype_none, rdataset,
|
||||||
sigrdataset DNS__DB_FLARG_PASS);
|
sigrdataset DNS__DB_FLARG_PASS);
|
||||||
maybe_update_headers(search->qpdb, found, foundsig,
|
|
||||||
nlock, &nlocktype, search->now);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NODE_UNLOCK(nlock, &nlocktype);
|
NODE_UNLOCK(nlock, &nlocktype);
|
||||||
@@ -1505,8 +1439,6 @@ find_coveringnsec(qpc_search_t *search, const dns_name_t *name,
|
|||||||
bindrdatasets(search->qpdb, node, found, foundsig, search->now,
|
bindrdatasets(search->qpdb, node, found, foundsig, search->now,
|
||||||
nlocktype, isc_rwlocktype_none, rdataset,
|
nlocktype, isc_rwlocktype_none, rdataset,
|
||||||
sigrdataset DNS__DB_FLARG_PASS);
|
sigrdataset DNS__DB_FLARG_PASS);
|
||||||
maybe_update_headers(search->qpdb, found, foundsig, nlock,
|
|
||||||
&nlocktype, search->now);
|
|
||||||
dns_name_copy(fname, foundname);
|
dns_name_copy(fname, foundname);
|
||||||
|
|
||||||
result = DNS_R_COVERINGNSEC;
|
result = DNS_R_COVERINGNSEC;
|
||||||
@@ -1796,8 +1728,6 @@ qpcache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
|
|||||||
bindrdatasets(search.qpdb, node, nsecheader, nsecsig,
|
bindrdatasets(search.qpdb, node, nsecheader, nsecsig,
|
||||||
search.now, nlocktype, tlocktype,
|
search.now, nlocktype, tlocktype,
|
||||||
rdataset, sigrdataset DNS__DB_FLARG_PASS);
|
rdataset, sigrdataset DNS__DB_FLARG_PASS);
|
||||||
maybe_update_headers(search.qpdb, nsecheader, nsecsig,
|
|
||||||
nlock, &nlocktype, search.now);
|
|
||||||
result = DNS_R_COVERINGNSEC;
|
result = DNS_R_COVERINGNSEC;
|
||||||
goto node_exit;
|
goto node_exit;
|
||||||
}
|
}
|
||||||
@@ -1831,8 +1761,6 @@ qpcache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
|
|||||||
bindrdatasets(search.qpdb, node, nsheader, nssig,
|
bindrdatasets(search.qpdb, node, nsheader, nssig,
|
||||||
search.now, nlocktype, tlocktype,
|
search.now, nlocktype, tlocktype,
|
||||||
rdataset, sigrdataset DNS__DB_FLARG_PASS);
|
rdataset, sigrdataset DNS__DB_FLARG_PASS);
|
||||||
maybe_update_headers(search.qpdb, nsheader, nssig,
|
|
||||||
nlock, &nlocktype, search.now);
|
|
||||||
result = DNS_R_DELEGATION;
|
result = DNS_R_DELEGATION;
|
||||||
goto node_exit;
|
goto node_exit;
|
||||||
}
|
}
|
||||||
@@ -1885,8 +1813,6 @@ qpcache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
|
|||||||
bindrdatasets(search.qpdb, node, found, foundsig, search.now,
|
bindrdatasets(search.qpdb, node, found, foundsig, search.now,
|
||||||
nlocktype, tlocktype, rdataset,
|
nlocktype, tlocktype, rdataset,
|
||||||
sigrdataset DNS__DB_FLARG_PASS);
|
sigrdataset DNS__DB_FLARG_PASS);
|
||||||
maybe_update_headers(search.qpdb, found, foundsig, nlock,
|
|
||||||
&nlocktype, search.now);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
node_exit:
|
node_exit:
|
||||||
@@ -1978,8 +1904,6 @@ seek_ns_headers(qpc_search_t *search, qpcnode_t *node, dns_dbnode_t **nodep,
|
|||||||
bindrdatasets(search->qpdb, node, found, foundsig, search->now,
|
bindrdatasets(search->qpdb, node, found, foundsig, search->now,
|
||||||
nlocktype, *tlocktype, rdataset,
|
nlocktype, *tlocktype, rdataset,
|
||||||
sigrdataset DNS__DB_FLARG_PASS);
|
sigrdataset DNS__DB_FLARG_PASS);
|
||||||
maybe_update_headers(search->qpdb, found, foundsig, nlock, &nlocktype,
|
|
||||||
search->now);
|
|
||||||
|
|
||||||
NODE_UNLOCK(nlock, &nlocktype);
|
NODE_UNLOCK(nlock, &nlocktype);
|
||||||
|
|
||||||
@@ -2115,8 +2039,6 @@ qpcache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
|||||||
bindrdatasets(qpdb, qpnode, found, foundsig, search.now,
|
bindrdatasets(qpdb, qpnode, found, foundsig, search.now,
|
||||||
nlocktype, isc_rwlocktype_none, rdataset,
|
nlocktype, isc_rwlocktype_none, rdataset,
|
||||||
sigrdataset DNS__DB_FLARG_PASS);
|
sigrdataset DNS__DB_FLARG_PASS);
|
||||||
maybe_update_headers(qpdb, found, foundsig, nlock, &nlocktype,
|
|
||||||
search.now);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NODE_UNLOCK(nlock, &nlocktype);
|
NODE_UNLOCK(nlock, &nlocktype);
|
||||||
@@ -2219,114 +2141,6 @@ expiredata(dns_db_t *db, dns_dbnode_t *node, void *data) {
|
|||||||
INSIST(tlocktype == isc_rwlocktype_none);
|
INSIST(tlocktype == isc_rwlocktype_none);
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t
|
|
||||||
rdataset_size(dns_slabheader_t *header) {
|
|
||||||
if (EXISTS(header)) {
|
|
||||||
return dns_rdataslab_size(header);
|
|
||||||
}
|
|
||||||
|
|
||||||
return sizeof(*header);
|
|
||||||
}
|
|
||||||
|
|
||||||
static size_t
|
|
||||||
expire_lru_headers(qpcache_t *qpdb, unsigned int locknum,
|
|
||||||
isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep,
|
|
||||||
size_t purgesize DNS__DB_FLARG) {
|
|
||||||
dns_slabheader_t *header = NULL;
|
|
||||||
size_t purged = 0;
|
|
||||||
|
|
||||||
for (header = ISC_LIST_TAIL(qpdb->buckets[locknum].lru);
|
|
||||||
header != NULL && header->last_used <= qpdb->last_used &&
|
|
||||||
purged <= purgesize;
|
|
||||||
header = ISC_LIST_TAIL(qpdb->buckets[locknum].lru))
|
|
||||||
{
|
|
||||||
size_t header_size = rdataset_size(header);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Unlink the entry at this point to avoid checking it
|
|
||||||
* again even if it's currently used someone else and
|
|
||||||
* cannot be purged at this moment. This entry won't be
|
|
||||||
* referenced any more (so unlinking is safe) since the
|
|
||||||
* TTL will be reset to 0.
|
|
||||||
*/
|
|
||||||
ISC_LIST_UNLINK(qpdb->buckets[locknum].lru, header, link);
|
|
||||||
expireheader(header, nlocktypep, tlocktypep,
|
|
||||||
dns_expire_lru DNS__DB_FLARG_PASS);
|
|
||||||
purged += header_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
return purged;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*%
|
|
||||||
* Purge some expired and/or stale (i.e. unused for some period) cache entries
|
|
||||||
* due to an overmem condition. To recover from this condition quickly,
|
|
||||||
* we clean up entries up to the size of newly added rdata that triggered
|
|
||||||
* the overmem; this is accessible via newheader.
|
|
||||||
*
|
|
||||||
* The LRU lists tails are processed in LRU order to the nearest second.
|
|
||||||
*
|
|
||||||
* A write lock on the tree must be held.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
overmem(qpcache_t *qpdb, dns_slabheader_t *newheader,
|
|
||||||
isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) {
|
|
||||||
uint32_t locknum_start = qpdb->lru_sweep++ % qpdb->buckets_count;
|
|
||||||
uint32_t locknum = locknum_start;
|
|
||||||
size_t purgesize, purged = 0;
|
|
||||||
isc_stdtime_t min_last_used = 0;
|
|
||||||
size_t max_passes = 8;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Maximum estimated size of the data being added: The size
|
|
||||||
* of the rdataset, plus a new QP database node and nodename,
|
|
||||||
* and a possible additional NSEC node and nodename. Also add
|
|
||||||
* a 12k margin for a possible QP-trie chunk allocation.
|
|
||||||
* (It's okay to overestimate, we want to get cache memory
|
|
||||||
* down quickly.)
|
|
||||||
*/
|
|
||||||
purgesize = 2 * (sizeof(qpcnode_t) +
|
|
||||||
dns_name_size(&HEADERNODE(newheader)->name)) +
|
|
||||||
rdataset_size(newheader) + 12288;
|
|
||||||
again:
|
|
||||||
do {
|
|
||||||
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
|
|
||||||
isc_rwlock_t *nlock = &qpdb->buckets[locknum].lock;
|
|
||||||
NODE_WRLOCK(nlock, &nlocktype);
|
|
||||||
|
|
||||||
purged += expire_lru_headers(
|
|
||||||
qpdb, locknum, &nlocktype, tlocktypep,
|
|
||||||
purgesize - purged DNS__DB_FLARG_PASS);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Work out the oldest remaining last_used values of the list
|
|
||||||
* tails as we walk across the array of lru lists.
|
|
||||||
*/
|
|
||||||
dns_slabheader_t *header =
|
|
||||||
ISC_LIST_TAIL(qpdb->buckets[locknum].lru);
|
|
||||||
if (header != NULL &&
|
|
||||||
(min_last_used == 0 || header->last_used < min_last_used))
|
|
||||||
{
|
|
||||||
min_last_used = header->last_used;
|
|
||||||
}
|
|
||||||
NODE_UNLOCK(nlock, &nlocktype);
|
|
||||||
locknum = (locknum + 1) % qpdb->buckets_count;
|
|
||||||
} while (locknum != locknum_start && purged <= purgesize);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Update qpdb->last_used if we have walked all the list tails and have
|
|
||||||
* not freed the required amount of memory.
|
|
||||||
*/
|
|
||||||
if (purged < purgesize) {
|
|
||||||
if (min_last_used != 0) {
|
|
||||||
qpdb->last_used = min_last_used;
|
|
||||||
if (max_passes-- > 0) {
|
|
||||||
goto again;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*%
|
/*%
|
||||||
* These functions allow the heap code to rank the priority of each
|
* These functions allow the heap code to rank the priority of each
|
||||||
* element. It returns true if v1 happens "sooner" than v2.
|
* element. It returns true if v1 happens "sooner" than v2.
|
||||||
@@ -2385,7 +2199,7 @@ qpcache__destroy(qpcache_t *qpdb) {
|
|||||||
for (i = 0; i < qpdb->buckets_count; i++) {
|
for (i = 0; i < qpdb->buckets_count; i++) {
|
||||||
NODE_DESTROYLOCK(&qpdb->buckets[i].lock);
|
NODE_DESTROYLOCK(&qpdb->buckets[i].lock);
|
||||||
|
|
||||||
INSIST(ISC_LIST_EMPTY(qpdb->buckets[i].lru));
|
INSIST(ISC_SIEVE_EMPTY(qpdb->buckets[i].sieve));
|
||||||
|
|
||||||
INSIST(isc_queue_empty(&qpdb->buckets[i].deadnodes));
|
INSIST(isc_queue_empty(&qpdb->buckets[i].deadnodes));
|
||||||
isc_queue_destroy(&qpdb->buckets[i].deadnodes);
|
isc_queue_destroy(&qpdb->buckets[i].deadnodes);
|
||||||
@@ -2428,9 +2242,7 @@ qpcache_destroy(dns_db_t *arg) {
|
|||||||
* to wait for the tree write lock.
|
* to wait for the tree write lock.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
cleanup_deadnodes(void *arg) {
|
cleanup_deadnodes(qpcache_t *qpdb, uint16_t locknum) {
|
||||||
qpcache_t *qpdb = arg;
|
|
||||||
uint16_t locknum = isc_tid();
|
|
||||||
isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
|
isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
|
||||||
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
|
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
|
||||||
isc_rwlock_t *nlock = &qpdb->buckets[locknum].lock;
|
isc_rwlock_t *nlock = &qpdb->buckets[locknum].lock;
|
||||||
@@ -2444,8 +2256,7 @@ cleanup_deadnodes(void *arg) {
|
|||||||
TREE_WRLOCK(&qpdb->tree_lock, &tlocktype);
|
TREE_WRLOCK(&qpdb->tree_lock, &tlocktype);
|
||||||
NODE_WRLOCK(nlock, &nlocktype);
|
NODE_WRLOCK(nlock, &nlocktype);
|
||||||
|
|
||||||
RUNTIME_CHECK(isc_queue_splice(&deadnodes,
|
isc_queue_splice(&deadnodes, &qpdb->buckets[locknum].deadnodes);
|
||||||
&qpdb->buckets[locknum].deadnodes));
|
|
||||||
isc_queue_for_each_entry_safe(&deadnodes, qpnode, qpnext, deadlink) {
|
isc_queue_for_each_entry_safe(&deadnodes, qpnode, qpnext, deadlink) {
|
||||||
qpcnode_release(qpdb, qpnode, &nlocktype,
|
qpcnode_release(qpdb, qpnode, &nlocktype,
|
||||||
&tlocktype DNS__DB_FILELINE);
|
&tlocktype DNS__DB_FILELINE);
|
||||||
@@ -2455,6 +2266,14 @@ cleanup_deadnodes(void *arg) {
|
|||||||
TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
|
TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
cleanup_deadnodes_cb(void *arg) {
|
||||||
|
qpcache_t *qpdb = arg;
|
||||||
|
uint16_t locknum = isc_tid();
|
||||||
|
|
||||||
|
cleanup_deadnodes(qpdb, locknum);
|
||||||
|
qpcache_unref(qpdb);
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* This function is assumed to be called when a node is newly referenced
|
* This function is assumed to be called when a node is newly referenced
|
||||||
* and can be in the deadnode list. In that case the node will be references
|
* and can be in the deadnode list. In that case the node will be references
|
||||||
@@ -2657,7 +2476,6 @@ add(qpcache_t *qpdb, qpcnode_t *qpnode,
|
|||||||
dns_slabheader_t *prioheader = NULL, *expireheader = NULL;
|
dns_slabheader_t *prioheader = NULL, *expireheader = NULL;
|
||||||
dns_typepair_t negtype = 0;
|
dns_typepair_t negtype = 0;
|
||||||
dns_trust_t trust;
|
dns_trust_t trust;
|
||||||
int idx;
|
|
||||||
uint32_t ntypes = 0;
|
uint32_t ntypes = 0;
|
||||||
|
|
||||||
if ((options & DNS_DBADD_FORCE) != 0) {
|
if ((options & DNS_DBADD_FORCE) != 0) {
|
||||||
@@ -2833,17 +2651,9 @@ find_header:
|
|||||||
if (header->expire > newheader->expire) {
|
if (header->expire > newheader->expire) {
|
||||||
setttl(header, newheader->expire);
|
setttl(header, newheader->expire);
|
||||||
}
|
}
|
||||||
if (header->last_used != now) {
|
|
||||||
ISC_LIST_UNLINK(
|
qpcache_hit(qpdb, header);
|
||||||
qpdb->buckets[HEADERNODE(header)->locknum]
|
|
||||||
.lru,
|
|
||||||
header, link);
|
|
||||||
header->last_used = now;
|
|
||||||
ISC_LIST_PREPEND(
|
|
||||||
qpdb->buckets[HEADERNODE(header)->locknum]
|
|
||||||
.lru,
|
|
||||||
header, link);
|
|
||||||
}
|
|
||||||
if (header->noqname == NULL &&
|
if (header->noqname == NULL &&
|
||||||
newheader->noqname != NULL)
|
newheader->noqname != NULL)
|
||||||
{
|
{
|
||||||
@@ -2895,17 +2705,9 @@ find_header:
|
|||||||
if (header->expire > newheader->expire) {
|
if (header->expire > newheader->expire) {
|
||||||
setttl(header, newheader->expire);
|
setttl(header, newheader->expire);
|
||||||
}
|
}
|
||||||
if (header->last_used != now) {
|
|
||||||
ISC_LIST_UNLINK(
|
qpcache_hit(qpdb, header);
|
||||||
qpdb->buckets[HEADERNODE(header)->locknum]
|
|
||||||
.lru,
|
|
||||||
header, link);
|
|
||||||
header->last_used = now;
|
|
||||||
ISC_LIST_PREPEND(
|
|
||||||
qpdb->buckets[HEADERNODE(header)->locknum]
|
|
||||||
.lru,
|
|
||||||
header, link);
|
|
||||||
}
|
|
||||||
if (header->noqname == NULL &&
|
if (header->noqname == NULL &&
|
||||||
newheader->noqname != NULL)
|
newheader->noqname != NULL)
|
||||||
{
|
{
|
||||||
@@ -2927,17 +2729,9 @@ find_header:
|
|||||||
return ISC_R_SUCCESS;
|
return ISC_R_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
idx = HEADERNODE(newheader)->locknum;
|
qpcache_miss(qpdb, newheader, &nlocktype,
|
||||||
isc_heap_insert(qpdb->buckets[idx].heap, newheader);
|
&tlocktype DNS__DB_FLARG_PASS);
|
||||||
newheader->heap = qpdb->buckets[idx].heap;
|
|
||||||
if (ZEROTTL(newheader)) {
|
|
||||||
newheader->last_used = qpdb->last_used + 1;
|
|
||||||
ISC_LIST_APPEND(qpdb->buckets[idx].lru, newheader,
|
|
||||||
link);
|
|
||||||
} else {
|
|
||||||
ISC_LIST_PREPEND(qpdb->buckets[idx].lru, newheader,
|
|
||||||
link);
|
|
||||||
}
|
|
||||||
if (topheader_prev != NULL) {
|
if (topheader_prev != NULL) {
|
||||||
topheader_prev->next = newheader;
|
topheader_prev->next = newheader;
|
||||||
} else {
|
} else {
|
||||||
@@ -2961,17 +2755,8 @@ find_header:
|
|||||||
/* No rdatasets of the given type exist at the node. */
|
/* No rdatasets of the given type exist at the node. */
|
||||||
INSIST(newheader->down == NULL);
|
INSIST(newheader->down == NULL);
|
||||||
|
|
||||||
idx = HEADERNODE(newheader)->locknum;
|
qpcache_miss(qpdb, newheader, &nlocktype,
|
||||||
isc_heap_insert(qpdb->buckets[idx].heap, newheader);
|
&tlocktype DNS__DB_FLARG_PASS);
|
||||||
newheader->heap = qpdb->buckets[idx].heap;
|
|
||||||
if (ZEROTTL(newheader)) {
|
|
||||||
ISC_LIST_APPEND(qpdb->buckets[idx].lru, newheader,
|
|
||||||
link);
|
|
||||||
} else {
|
|
||||||
ISC_LIST_PREPEND(qpdb->buckets[idx].lru, newheader,
|
|
||||||
link);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (prio_header(newheader)) {
|
if (prio_header(newheader)) {
|
||||||
/* This is a priority type, prepend it */
|
/* This is a priority type, prepend it */
|
||||||
newheader->next = qpnode->data;
|
newheader->next = qpnode->data;
|
||||||
@@ -3097,7 +2882,7 @@ cleanup:
|
|||||||
static void
|
static void
|
||||||
expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum,
|
expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum,
|
||||||
isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep,
|
isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep,
|
||||||
isc_stdtime_t now, bool cache_is_overmem DNS__DB_FLARG);
|
isc_stdtime_t now DNS__DB_FLARG);
|
||||||
|
|
||||||
static isc_result_t
|
static isc_result_t
|
||||||
qpcache_addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
qpcache_addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
||||||
@@ -3114,7 +2899,6 @@ qpcache_addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
|||||||
isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
|
isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
|
||||||
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
|
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
|
||||||
isc_rwlock_t *nlock = NULL;
|
isc_rwlock_t *nlock = NULL;
|
||||||
bool cache_is_overmem = false;
|
|
||||||
dns_fixedname_t fixed;
|
dns_fixedname_t fixed;
|
||||||
dns_name_t *name = NULL;
|
dns_name_t *name = NULL;
|
||||||
isc_stdtime_t now = __now ? __now : isc_stdtime_now();
|
isc_stdtime_t now = __now ? __now : isc_stdtime_now();
|
||||||
@@ -3140,8 +2924,6 @@ qpcache_addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
|||||||
newheader = (dns_slabheader_t *)region.base;
|
newheader = (dns_slabheader_t *)region.base;
|
||||||
dns_slabheader_reset(newheader, db, node);
|
dns_slabheader_reset(newheader, db, node);
|
||||||
|
|
||||||
newheader->last_used = now;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* By default, dns_rdataslab_fromrdataset() sets newheader->ttl
|
* By default, dns_rdataslab_fromrdataset() sets newheader->ttl
|
||||||
* to the rdataset TTL. In the case of the cache, that's wrong;
|
* to the rdataset TTL. In the case of the cache, that's wrong;
|
||||||
@@ -3195,34 +2977,17 @@ qpcache_addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
|||||||
/*
|
/*
|
||||||
* Add to the auxiliary NSEC tree if we're adding an NSEC record.
|
* Add to the auxiliary NSEC tree if we're adding an NSEC record.
|
||||||
*/
|
*/
|
||||||
TREE_RDLOCK(&qpdb->tree_lock, &tlocktype);
|
newnsec = (qpnode->nsec != DNS_DB_NSEC_HAS_NSEC &&
|
||||||
if (qpnode->nsec != DNS_DB_NSEC_HAS_NSEC &&
|
rdataset->type == dns_rdatatype_nsec);
|
||||||
rdataset->type == dns_rdatatype_nsec)
|
|
||||||
{
|
|
||||||
newnsec = true;
|
|
||||||
} else {
|
|
||||||
newnsec = false;
|
|
||||||
}
|
|
||||||
TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we're adding a delegation type, adding to the auxiliary NSEC
|
* If we're adding a delegation type or adding to the auxiliary
|
||||||
* tree, or the DB is a cache in an overmem state, hold an
|
* NSEC tree, hold an exclusive lock on the tree.
|
||||||
* exclusive lock on the tree. In the latter case the lock does
|
|
||||||
* not necessarily have to be acquired but it will help purge
|
|
||||||
* ancient entries more effectively.
|
|
||||||
*/
|
*/
|
||||||
if (isc_mem_isovermem(qpdb->common.mctx)) {
|
if (delegating || newnsec) {
|
||||||
cache_is_overmem = true;
|
|
||||||
}
|
|
||||||
if (delegating || newnsec || cache_is_overmem) {
|
|
||||||
TREE_WRLOCK(&qpdb->tree_lock, &tlocktype);
|
TREE_WRLOCK(&qpdb->tree_lock, &tlocktype);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cache_is_overmem) {
|
|
||||||
overmem(qpdb, newheader, &tlocktype DNS__DB_FLARG_PASS);
|
|
||||||
}
|
|
||||||
|
|
||||||
nlock = &qpdb->buckets[qpnode->locknum].lock;
|
nlock = &qpdb->buckets[qpnode->locknum].lock;
|
||||||
|
|
||||||
NODE_WRLOCK(nlock, &nlocktype);
|
NODE_WRLOCK(nlock, &nlocktype);
|
||||||
@@ -3234,27 +2999,15 @@ qpcache_addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
|||||||
true);
|
true);
|
||||||
}
|
}
|
||||||
|
|
||||||
expire_ttl_headers(qpdb, qpnode->locknum, &nlocktype, &tlocktype, now,
|
expire_ttl_headers(qpdb, qpnode->locknum, &nlocktype, &tlocktype,
|
||||||
cache_is_overmem DNS__DB_FLARG_PASS);
|
now DNS__DB_FLARG_PASS);
|
||||||
|
|
||||||
/*
|
|
||||||
* If we've been holding a write lock on the tree just for
|
|
||||||
* cleaning, we can release it now. However, we still need the
|
|
||||||
* node lock.
|
|
||||||
*/
|
|
||||||
if (tlocktype == isc_rwlocktype_write && !delegating && !newnsec) {
|
|
||||||
TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
|
|
||||||
}
|
|
||||||
|
|
||||||
result = ISC_R_SUCCESS;
|
|
||||||
if (newnsec) {
|
if (newnsec) {
|
||||||
qpcnode_t *nsecnode = NULL;
|
qpcnode_t *nsecnode = NULL;
|
||||||
|
|
||||||
result = dns_qp_getname(qpdb->nsec, name, (void **)&nsecnode,
|
result = dns_qp_getname(qpdb->nsec, name, (void **)&nsecnode,
|
||||||
NULL);
|
NULL);
|
||||||
if (result == ISC_R_SUCCESS) {
|
if (result != ISC_R_SUCCESS) {
|
||||||
result = ISC_R_SUCCESS;
|
|
||||||
} else {
|
|
||||||
INSIST(nsecnode == NULL);
|
INSIST(nsecnode == NULL);
|
||||||
nsecnode = new_qpcnode(qpdb, name);
|
nsecnode = new_qpcnode(qpdb, name);
|
||||||
nsecnode->nsec = DNS_DB_NSEC_NSEC;
|
nsecnode->nsec = DNS_DB_NSEC_NSEC;
|
||||||
@@ -3265,11 +3018,9 @@ qpcache_addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
|||||||
qpnode->nsec = DNS_DB_NSEC_HAS_NSEC;
|
qpnode->nsec = DNS_DB_NSEC_HAS_NSEC;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result == ISC_R_SUCCESS) {
|
result = add(qpdb, qpnode, name, newheader, options, addedrdataset, now,
|
||||||
result = add(qpdb, qpnode, name, newheader, options,
|
nlocktype, tlocktype DNS__DB_FLARG_PASS);
|
||||||
addedrdataset, now, nlocktype,
|
|
||||||
tlocktype DNS__DB_FLARG_PASS);
|
|
||||||
}
|
|
||||||
if (result == ISC_R_SUCCESS && delegating) {
|
if (result == ISC_R_SUCCESS && delegating) {
|
||||||
qpnode->delegating = 1;
|
qpnode->delegating = 1;
|
||||||
}
|
}
|
||||||
@@ -3279,6 +3030,7 @@ qpcache_addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
|
|||||||
if (tlocktype != isc_rwlocktype_none) {
|
if (tlocktype != isc_rwlocktype_none) {
|
||||||
TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
|
TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
|
||||||
}
|
}
|
||||||
|
|
||||||
INSIST(tlocktype == isc_rwlocktype_none);
|
INSIST(tlocktype == isc_rwlocktype_none);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
@@ -3402,7 +3154,7 @@ dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin,
|
|||||||
|
|
||||||
dns_rdatasetstats_create(mctx, &qpdb->rrsetstats);
|
dns_rdatasetstats_create(mctx, &qpdb->rrsetstats);
|
||||||
for (i = 0; i < (int)qpdb->buckets_count; i++) {
|
for (i = 0; i < (int)qpdb->buckets_count; i++) {
|
||||||
ISC_LIST_INIT(qpdb->buckets[i].lru);
|
ISC_SIEVE_INIT(qpdb->buckets[i].sieve);
|
||||||
|
|
||||||
qpdb->buckets[i].heap = NULL;
|
qpdb->buckets[i].heap = NULL;
|
||||||
isc_heap_create(hmctx, ttl_sooner, set_index, 0,
|
isc_heap_create(hmctx, ttl_sooner, set_index, 0,
|
||||||
@@ -3911,6 +3663,7 @@ deletedata(dns_db_t *db ISC_ATTR_UNUSED, dns_dbnode_t *node ISC_ATTR_UNUSED,
|
|||||||
void *data) {
|
void *data) {
|
||||||
dns_slabheader_t *header = data;
|
dns_slabheader_t *header = data;
|
||||||
qpcache_t *qpdb = (qpcache_t *)header->db;
|
qpcache_t *qpdb = (qpcache_t *)header->db;
|
||||||
|
int idx = HEADERNODE(header)->locknum;
|
||||||
|
|
||||||
if (header->heap != NULL && header->heap_index != 0) {
|
if (header->heap != NULL && header->heap_index != 0) {
|
||||||
isc_heap_delete(header->heap, header->heap_index);
|
isc_heap_delete(header->heap, header->heap_index);
|
||||||
@@ -3920,8 +3673,7 @@ deletedata(dns_db_t *db ISC_ATTR_UNUSED, dns_dbnode_t *node ISC_ATTR_UNUSED,
|
|||||||
atomic_load_acquire(&header->attributes), false);
|
atomic_load_acquire(&header->attributes), false);
|
||||||
|
|
||||||
if (ISC_LINK_LINKED(header, link)) {
|
if (ISC_LINK_LINKED(header, link)) {
|
||||||
int idx = HEADERNODE(header)->locknum;
|
ISC_SIEVE_UNLINK(qpdb->buckets[idx].sieve, header, link);
|
||||||
ISC_LIST_UNLINK(qpdb->buckets[idx].lru, header, link);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (header->noqname != NULL) {
|
if (header->noqname != NULL) {
|
||||||
@@ -3938,7 +3690,7 @@ deletedata(dns_db_t *db ISC_ATTR_UNUSED, dns_dbnode_t *node ISC_ATTR_UNUSED,
|
|||||||
static void
|
static void
|
||||||
expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum,
|
expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum,
|
||||||
isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep,
|
isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep,
|
||||||
isc_stdtime_t now, bool cache_is_overmem DNS__DB_FLARG) {
|
isc_stdtime_t now DNS__DB_FLARG) {
|
||||||
isc_heap_t *heap = qpdb->buckets[locknum].heap;
|
isc_heap_t *heap = qpdb->buckets[locknum].heap;
|
||||||
|
|
||||||
for (size_t i = 0; i < DNS_QPDB_EXPIRE_TTL_COUNT; i++) {
|
for (size_t i = 0; i < DNS_QPDB_EXPIRE_TTL_COUNT; i++) {
|
||||||
@@ -3949,12 +3701,7 @@ expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
dns_ttl_t ttl = header->expire;
|
dns_ttl_t ttl = header->expire + STALE_TTL(header, qpdb);
|
||||||
|
|
||||||
if (!cache_is_overmem) {
|
|
||||||
/* Only account for stale TTL if cache is not overmem */
|
|
||||||
ttl += STALE_TTL(header, qpdb);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ttl >= now - QPDB_VIRTUAL) {
|
if (ttl >= now - QPDB_VIRTUAL) {
|
||||||
/*
|
/*
|
||||||
|
@@ -865,6 +865,7 @@ dns_slabheader_reset(dns_slabheader_t *h, dns_db_t *db, dns_dbnode_t *node) {
|
|||||||
h->heap = NULL;
|
h->heap = NULL;
|
||||||
h->db = db;
|
h->db = db;
|
||||||
h->node = node;
|
h->node = node;
|
||||||
|
h->visited = false;
|
||||||
|
|
||||||
atomic_init(&h->attributes, 0);
|
atomic_init(&h->attributes, 0);
|
||||||
atomic_init(&h->last_refresh_fail_ts, 0);
|
atomic_init(&h->last_refresh_fail_ts, 0);
|
||||||
|
@@ -75,6 +75,7 @@ libisc_la_HEADERS = \
|
|||||||
include/isc/rwlock.h \
|
include/isc/rwlock.h \
|
||||||
include/isc/safe.h \
|
include/isc/safe.h \
|
||||||
include/isc/serial.h \
|
include/isc/serial.h \
|
||||||
|
include/isc/sieve.h \
|
||||||
include/isc/signal.h \
|
include/isc/signal.h \
|
||||||
include/isc/siphash.h \
|
include/isc/siphash.h \
|
||||||
include/isc/sockaddr.h \
|
include/isc/sockaddr.h \
|
||||||
|
166
lib/isc/include/isc/sieve.h
Normal file
166
lib/isc/include/isc/sieve.h
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MPL-2.0
|
||||||
|
*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
|
||||||
|
*
|
||||||
|
* See the COPYRIGHT file distributed with this work for additional
|
||||||
|
* information regarding copyright ownership.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
/*! \file isc/sieve.h */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Zhang, Yazhuo, Juncheng Yang, Yao Yue, Ymir Vigfusson, and K V Rashmi.
|
||||||
|
* “SIEVE Is Simpler than LRU: An Efficient Turn-Key Eviction Algorithm for
|
||||||
|
* Web Caches,” n.d.
|
||||||
|
*
|
||||||
|
* Algorithm 1 SIEVE
|
||||||
|
*
|
||||||
|
* Input: The request x, doubly-linked queue T , cache size C, hand p
|
||||||
|
* 1: if x is in T then ▷ Cache Hit
|
||||||
|
* 2: x.visited ←1
|
||||||
|
* 3: else ▷ Cache Miss
|
||||||
|
* 4: if |T |= C then ▷ Cache Full
|
||||||
|
* 5: o ←p
|
||||||
|
* 6: if o is NULL then
|
||||||
|
* 7: o ←tail of T
|
||||||
|
* 8: while o.visited = 1 do
|
||||||
|
* 9: o.visited ←0
|
||||||
|
* 10: o ←o.prev
|
||||||
|
* 11: if o is NULL then
|
||||||
|
* 12: o ←tail of T
|
||||||
|
* 13: p ←o.prev
|
||||||
|
* 14: Discard o in T ▷ Eviction
|
||||||
|
* 15: Insert x in the head of T .
|
||||||
|
* 16: x.visited ←0 ▷ Insertion
|
||||||
|
*
|
||||||
|
* Data structure. SIEVE requires only one FIFO queue and one pointer
|
||||||
|
* called “hand”. The queue maintains the insertion order between objects.
|
||||||
|
* Each object in the queue uses one bit to track the visited/non-visited
|
||||||
|
* status. The hand points to the next eviction candidate in the cache and
|
||||||
|
* moves from the tail to the head. Note that, unlike existing algorithms,
|
||||||
|
* e.g., LRU, FIFO, and CLOCK, in which the eviction candidate is always
|
||||||
|
* the tail object, the eviction candidate in SIEVE is an object somewhere
|
||||||
|
* in the queue.
|
||||||
|
*
|
||||||
|
* SIEVE operations. A cache hit in SIEVE changes the visited bit of the
|
||||||
|
* accessed object to 1. For a popular object whose visited bit is already
|
||||||
|
* 1, SIEVE does not need to perform any operation. During a cache miss,
|
||||||
|
* SIEVE examines the object pointed by the hand. If it has been visited,
|
||||||
|
* the visited bit is reset, and the hand moves to the next position (the
|
||||||
|
* retained object stays in the original position of the queue). It
|
||||||
|
* continues this process until it encounters an object with the visited
|
||||||
|
* bit being 0, and it evicts the object. After the eviction, the hand
|
||||||
|
* points to the next position (the previous object in the queue). While
|
||||||
|
* an evicted object is in the middle of the queue most of the time, a new
|
||||||
|
* object is always inserted into the head of the queue. In other words,
|
||||||
|
* the new objects and the retained objects are not mixed together.
|
||||||
|
*
|
||||||
|
* At first glance, SIEVE is similar to CLOCK/Second Chance/FIFO-Reinsertion.
|
||||||
|
* Each algorithm maintains a single queue in which each object is
|
||||||
|
* associated with a visited bit to track its access status. Visited
|
||||||
|
* objects are retained (also called "survived") during an eviction.
|
||||||
|
* Notably, new objects are inserted at the head of the queue in both SIEVE
|
||||||
|
* and FIFO-Reinsertion. However, the hand in SIEVE moves from the tail to
|
||||||
|
* the head over time, whereas the hand in FIFO-Reinsertion stays at the
|
||||||
|
* tail. The key difference is where a retained object is kept. SIEVE
|
||||||
|
* keeps it in the old position, while FIFO-Reinsertion inserts it at the
|
||||||
|
* head, together with newly inserted objects.
|
||||||
|
*
|
||||||
|
* We detail the algorithm in Alg. 1. Line 1 checks whether there is a
|
||||||
|
* hit, and if so, then line 2 sets the visited bit to one. In the case of
|
||||||
|
* a cache miss (Line 3), Lines 5-12 identify the object to be evicted.
|
||||||
|
*
|
||||||
|
* Lazy promotion and quick demotion. Despite a simple design, SIEVE
|
||||||
|
* effectively incorporates both lazy promotion and quick demotion. An
|
||||||
|
* object is only promoted at the eviction time in lazy promotion. SIEVE
|
||||||
|
* operates in a similar manner. However, rather than promoting the object
|
||||||
|
* to the head of the queue, SIEVE keeps the object at its original
|
||||||
|
* location. The "survived" objects are generally more popular than the
|
||||||
|
* evicted ones, thus, they are likely to be accessed again in the future.
|
||||||
|
* By gathering the "survived" objects, the hand in SIEVE can quickly move
|
||||||
|
* from the tail to the area near the head, where most objects are newly
|
||||||
|
* inserted. These newly inserted objects are quickly examined by the hand
|
||||||
|
* of SIEVE after they are admitted into the cache, thus achieving quick
|
||||||
|
* demotion. This eviction mechanism makes SIEVE achieve both lazy
|
||||||
|
* promotion and quick demotion with- out adding too much overhead.
|
||||||
|
*
|
||||||
|
* The key ingredient of SIEVE is the moving hand, which functions like an
|
||||||
|
* adaptive filter that removes unpopular objects from the cache. This
|
||||||
|
* mechanism enables SIEVE to strike a balance between finding new popular
|
||||||
|
* objects and keeping old popular objects.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <isc/list.h>
|
||||||
|
|
||||||
|
#define ISC_SIEVE(type) \
|
||||||
|
struct { \
|
||||||
|
ISC_LIST(type) list; \
|
||||||
|
type *hand; \
|
||||||
|
}
|
||||||
|
#define ISC_SIEVE_INIT(sieve) \
|
||||||
|
{ \
|
||||||
|
ISC_LIST_INIT((sieve).list); \
|
||||||
|
(sieve).hand = NULL; \
|
||||||
|
}
|
||||||
|
#define ISC_SIEVE_EMPTY(sieve) ISC_LIST_EMPTY((sieve).list)
|
||||||
|
|
||||||
|
#define ISC_SIEVE_MARKED(entry, visited) CMM_LOAD_SHARED((entry)->visited)
|
||||||
|
#define ISC_SIEVE_MARK(entry, visited) \
|
||||||
|
if (!ISC_SIEVE_MARKED(entry, visited)) { \
|
||||||
|
CMM_STORE_SHARED((entry)->visited, true); \
|
||||||
|
}
|
||||||
|
#define ISC_SIEVE_UNMARK(entry, visited) \
|
||||||
|
CMM_STORE_SHARED((entry)->visited, false)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note: To match the original algorithm design, the
|
||||||
|
* SIEVE queue is iterated from tail to head.
|
||||||
|
*/
|
||||||
|
#define ISC_SIEVE_NEXT(sieve, visited, link) \
|
||||||
|
({ \
|
||||||
|
__typeof__((sieve).hand) __hand = ((sieve).hand); \
|
||||||
|
if (__hand == NULL && !ISC_LIST_EMPTY((sieve).list)) { \
|
||||||
|
__hand = ISC_LIST_TAIL((sieve).list); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
while (__hand != NULL && ISC_SIEVE_MARKED(__hand, visited)) { \
|
||||||
|
ISC_SIEVE_UNMARK(__hand, visited); \
|
||||||
|
\
|
||||||
|
__hand = ISC_LIST_PREV(__hand, link); \
|
||||||
|
if (__hand == NULL) { \
|
||||||
|
/* We know the queue is not empty */ \
|
||||||
|
__hand = ISC_LIST_TAIL((sieve).list); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
(sieve).hand = __hand; \
|
||||||
|
__hand; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define ISC_SIEVE_UNLINK(sieve, entry, link) \
|
||||||
|
({ \
|
||||||
|
__typeof__((sieve).hand) __hand = (sieve).hand; \
|
||||||
|
/* 1. Go to the previous node (possibly head of the list) */ \
|
||||||
|
if (entry == __hand) { \
|
||||||
|
__hand = ISC_LIST_PREV(entry, link); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
/* 2. Unlink the node from the list */ \
|
||||||
|
ISC_LIST_UNLINK((sieve).list, entry, link); \
|
||||||
|
\
|
||||||
|
/* 3. We reached head, continue with tail again */ \
|
||||||
|
if (__hand == NULL && !ISC_LIST_EMPTY((sieve).list)) { \
|
||||||
|
__hand = ISC_LIST_TAIL((sieve).list); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
(sieve).hand = __hand; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define ISC_SIEVE_INSERT(sieve, entry, link) \
|
||||||
|
ISC_LIST_PREPEND((sieve).list, entry, link)
|
@@ -112,6 +112,16 @@ overmempurge_addrdataset(dns_db_t *db, isc_stdtime_t now, int idx,
|
|||||||
dns_db_detachnode(db, &node);
|
dns_db_detachnode(db, &node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
cleanup_all_deadnodes(dns_db_t *db) {
|
||||||
|
qpcache_t *qpdb = (qpcache_t *)db;
|
||||||
|
qpcache_ref(qpdb);
|
||||||
|
for (uint16_t locknum = 0; locknum < qpdb->buckets_count; locknum++) {
|
||||||
|
cleanup_deadnodes(qpdb, locknum);
|
||||||
|
}
|
||||||
|
qpcache_unref(qpdb);
|
||||||
|
}
|
||||||
|
|
||||||
ISC_LOOP_TEST_IMPL(overmempurge_bigrdata) {
|
ISC_LOOP_TEST_IMPL(overmempurge_bigrdata) {
|
||||||
size_t maxcache = 2097152U; /* 2MB - same as DNS_CACHE_MINSIZE */
|
size_t maxcache = 2097152U; /* 2MB - same as DNS_CACHE_MINSIZE */
|
||||||
size_t hiwater = maxcache - (maxcache >> 3); /* borrowed from cache.c */
|
size_t hiwater = maxcache - (maxcache >> 3); /* borrowed from cache.c */
|
||||||
@@ -150,6 +160,7 @@ ISC_LOOP_TEST_IMPL(overmempurge_bigrdata) {
|
|||||||
*/
|
*/
|
||||||
while (i-- > 0) {
|
while (i-- > 0) {
|
||||||
overmempurge_addrdataset(db, now, i, 50054, 65535, false);
|
overmempurge_addrdataset(db, now, i, 50054, 65535, false);
|
||||||
|
cleanup_all_deadnodes(db);
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
print_message("# inuse: %zd max: %zd\n",
|
print_message("# inuse: %zd max: %zd\n",
|
||||||
isc_mem_inuse(mctx2), maxcache);
|
isc_mem_inuse(mctx2), maxcache);
|
||||||
@@ -200,6 +211,7 @@ ISC_LOOP_TEST_IMPL(overmempurge_longname) {
|
|||||||
*/
|
*/
|
||||||
while (i-- > 0) {
|
while (i-- > 0) {
|
||||||
overmempurge_addrdataset(db, now, i, 50054, 0, true);
|
overmempurge_addrdataset(db, now, i, 50054, 0, true);
|
||||||
|
cleanup_all_deadnodes(db);
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
print_message("# inuse: %zd max: %zd\n",
|
print_message("# inuse: %zd max: %zd\n",
|
||||||
isc_mem_inuse(mctx2), maxcache);
|
isc_mem_inuse(mctx2), maxcache);
|
||||||
|
Reference in New Issue
Block a user