aboutsummaryrefslogtreecommitdiff
path: root/external/unbound/services
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--external/unbound/services/cache/dns.c816
-rw-r--r--external/unbound/services/cache/dns.h194
-rw-r--r--external/unbound/services/cache/infra.c569
-rw-r--r--external/unbound/services/cache/infra.h309
-rw-r--r--external/unbound/services/cache/rrset.c417
-rw-r--r--external/unbound/services/cache/rrset.h231
-rw-r--r--external/unbound/services/listen_dnsport.c1062
-rw-r--r--external/unbound/services/listen_dnsport.h210
-rw-r--r--external/unbound/services/localzone.c1400
-rw-r--r--external/unbound/services/localzone.h317
-rw-r--r--external/unbound/services/mesh.c1209
-rw-r--r--external/unbound/services/mesh.h572
-rw-r--r--external/unbound/services/modstack.c215
-rw-r--r--external/unbound/services/modstack.h113
-rw-r--r--external/unbound/services/outbound_list.c89
-rw-r--r--external/unbound/services/outbound_list.h105
-rw-r--r--external/unbound/services/outside_network.c2052
-rw-r--r--external/unbound/services/outside_network.h554
18 files changed, 10434 insertions, 0 deletions
diff --git a/external/unbound/services/cache/dns.c b/external/unbound/services/cache/dns.c
new file mode 100644
index 000000000..c663b8e8b
--- /dev/null
+++ b/external/unbound/services/cache/dns.c
@@ -0,0 +1,816 @@
+/*
+ * services/cache/dns.c - Cache services for DNS using msg and rrset caches.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains the DNS cache.
+ */
+#include "config.h"
+#include "iterator/iter_delegpt.h"
+#include "validator/val_nsec.h"
+#include "services/cache/dns.h"
+#include "services/cache/rrset.h"
+#include "util/data/msgreply.h"
+#include "util/data/packed_rrset.h"
+#include "util/data/dname.h"
+#include "util/module.h"
+#include "util/net_help.h"
+#include "util/regional.h"
+#include "util/config_file.h"
+#include "ldns/sbuffer.h"
+
+/** store rrsets in the rrset cache.
+ * @param env: module environment with caches.
+ * @param rep: contains list of rrsets to store.
+ * @param now: current time.
+ * @param leeway: during prefetch how much leeway to update TTLs.
+ * This makes rrsets (other than type NS) timeout sooner so they get
+ * updated with a new full TTL.
+ * Type NS does not get this, because it must not be refreshed from the
+ * child domain, but keep counting down properly.
+ * @param pside: if from parentside discovered NS, so that its NS is okay
+ * in a prefetch situation to be updated (without becoming sticky).
+ * @param qrep: update rrsets here if cache is better
+ * @param region: for qrep allocs.
+ */
+static void
+store_rrsets(struct module_env* env, struct reply_info* rep, time_t now,
+ time_t leeway, int pside, struct reply_info* qrep,
+ struct regional* region)
+{
+ size_t i;
+ /* see if rrset already exists in cache, if not insert it. */
+ for(i=0; i<rep->rrset_count; i++) {
+ rep->ref[i].key = rep->rrsets[i];
+ rep->ref[i].id = rep->rrsets[i]->id;
+ /* update ref if it was in the cache */
+ switch(rrset_cache_update(env->rrset_cache, &rep->ref[i],
+ env->alloc, now + ((ntohs(rep->ref[i].key->rk.type)==
+ LDNS_RR_TYPE_NS && !pside)?0:leeway))) {
+ case 0: /* ref unchanged, item inserted */
+ break;
+ case 2: /* ref updated, cache is superior */
+ if(region) {
+ struct ub_packed_rrset_key* ck;
+ lock_rw_rdlock(&rep->ref[i].key->entry.lock);
+ /* if deleted rrset, do not copy it */
+ if(rep->ref[i].key->id == 0)
+ ck = NULL;
+ else ck = packed_rrset_copy_region(
+ rep->ref[i].key, region, now);
+ lock_rw_unlock(&rep->ref[i].key->entry.lock);
+ if(ck) {
+ /* use cached copy if memory allows */
+ qrep->rrsets[i] = ck;
+ }
+ }
+ /* no break: also copy key item */
+ case 1: /* ref updated, item inserted */
+ rep->rrsets[i] = rep->ref[i].key;
+ }
+ }
+}
+
+void
+dns_cache_store_msg(struct module_env* env, struct query_info* qinfo,
+ hashvalue_t hash, struct reply_info* rep, time_t leeway, int pside,
+ struct reply_info* qrep, struct regional* region)
+{
+ struct msgreply_entry* e;
+ time_t ttl = rep->ttl;
+ size_t i;
+
+ /* store RRsets */
+ for(i=0; i<rep->rrset_count; i++) {
+ rep->ref[i].key = rep->rrsets[i];
+ rep->ref[i].id = rep->rrsets[i]->id;
+ }
+
+ /* there was a reply_info_sortref(rep) here but it seems to be
+ * unnecessary, because the cache gets locked per rrset. */
+ reply_info_set_ttls(rep, *env->now);
+ store_rrsets(env, rep, *env->now, leeway, pside, qrep, region);
+ if(ttl == 0) {
+ /* we do not store the message, but we did store the RRs,
+ * which could be useful for delegation information */
+ verbose(VERB_ALGO, "TTL 0: dropped msg from cache");
+ free(rep);
+ return;
+ }
+
+ /* store msg in the cache */
+ reply_info_sortref(rep);
+ if(!(e = query_info_entrysetup(qinfo, rep, hash))) {
+ log_err("store_msg: malloc failed");
+ return;
+ }
+ slabhash_insert(env->msg_cache, hash, &e->entry, rep, env->alloc);
+}
+
+/** find closest NS or DNAME and returns the rrset (locked) */
+static struct ub_packed_rrset_key*
+find_closest_of_type(struct module_env* env, uint8_t* qname, size_t qnamelen,
+ uint16_t qclass, time_t now, uint16_t searchtype, int stripfront)
+{
+ struct ub_packed_rrset_key *rrset;
+ uint8_t lablen;
+
+ if(stripfront) {
+ /* strip off so that DNAMEs have strict subdomain match */
+ lablen = *qname;
+ qname += lablen + 1;
+ qnamelen -= lablen + 1;
+ }
+
+ /* snip off front part of qname until the type is found */
+ while(qnamelen > 0) {
+ if((rrset = rrset_cache_lookup(env->rrset_cache, qname,
+ qnamelen, searchtype, qclass, 0, now, 0)))
+ return rrset;
+
+ /* snip off front label */
+ lablen = *qname;
+ qname += lablen + 1;
+ qnamelen -= lablen + 1;
+ }
+ return NULL;
+}
+
+/** add addr to additional section */
+static void
+addr_to_additional(struct ub_packed_rrset_key* rrset, struct regional* region,
+ struct dns_msg* msg, time_t now)
+{
+ if((msg->rep->rrsets[msg->rep->rrset_count] =
+ packed_rrset_copy_region(rrset, region, now))) {
+ msg->rep->ar_numrrsets++;
+ msg->rep->rrset_count++;
+ }
+}
+
+/** lookup message in message cache */
+static struct msgreply_entry*
+msg_cache_lookup(struct module_env* env, uint8_t* qname, size_t qnamelen,
+ uint16_t qtype, uint16_t qclass, time_t now, int wr)
+{
+ struct lruhash_entry* e;
+ struct query_info k;
+ hashvalue_t h;
+
+ k.qname = qname;
+ k.qname_len = qnamelen;
+ k.qtype = qtype;
+ k.qclass = qclass;
+ h = query_info_hash(&k);
+ e = slabhash_lookup(env->msg_cache, h, &k, wr);
+
+ if(!e) return NULL;
+ if( now > ((struct reply_info*)e->data)->ttl ) {
+ lock_rw_unlock(&e->lock);
+ return NULL;
+ }
+ return (struct msgreply_entry*)e->key;
+}
+
+/** find and add A and AAAA records for nameservers in delegpt */
+static int
+find_add_addrs(struct module_env* env, uint16_t qclass,
+ struct regional* region, struct delegpt* dp, time_t now,
+ struct dns_msg** msg)
+{
+ struct delegpt_ns* ns;
+ struct msgreply_entry* neg;
+ struct ub_packed_rrset_key* akey;
+ for(ns = dp->nslist; ns; ns = ns->next) {
+ akey = rrset_cache_lookup(env->rrset_cache, ns->name,
+ ns->namelen, LDNS_RR_TYPE_A, qclass, 0, now, 0);
+ if(akey) {
+ if(!delegpt_add_rrset_A(dp, region, akey, 0)) {
+ lock_rw_unlock(&akey->entry.lock);
+ return 0;
+ }
+ if(msg)
+ addr_to_additional(akey, region, *msg, now);
+ lock_rw_unlock(&akey->entry.lock);
+ } else {
+ neg = msg_cache_lookup(env, ns->name, ns->namelen,
+ LDNS_RR_TYPE_A, qclass, now, 0);
+ if(neg) {
+ delegpt_add_neg_msg(dp, neg);
+ lock_rw_unlock(&neg->entry.lock);
+ }
+ }
+ akey = rrset_cache_lookup(env->rrset_cache, ns->name,
+ ns->namelen, LDNS_RR_TYPE_AAAA, qclass, 0, now, 0);
+ if(akey) {
+ if(!delegpt_add_rrset_AAAA(dp, region, akey, 0)) {
+ lock_rw_unlock(&akey->entry.lock);
+ return 0;
+ }
+ if(msg)
+ addr_to_additional(akey, region, *msg, now);
+ lock_rw_unlock(&akey->entry.lock);
+ } else {
+ neg = msg_cache_lookup(env, ns->name, ns->namelen,
+ LDNS_RR_TYPE_AAAA, qclass, now, 0);
+ if(neg) {
+ delegpt_add_neg_msg(dp, neg);
+ lock_rw_unlock(&neg->entry.lock);
+ }
+ }
+ }
+ return 1;
+}
+
+/** find and add A and AAAA records for missing nameservers in delegpt */
+int
+cache_fill_missing(struct module_env* env, uint16_t qclass,
+ struct regional* region, struct delegpt* dp)
+{
+ struct delegpt_ns* ns;
+ struct msgreply_entry* neg;
+ struct ub_packed_rrset_key* akey;
+ time_t now = *env->now;
+ for(ns = dp->nslist; ns; ns = ns->next) {
+ akey = rrset_cache_lookup(env->rrset_cache, ns->name,
+ ns->namelen, LDNS_RR_TYPE_A, qclass, 0, now, 0);
+ if(akey) {
+ if(!delegpt_add_rrset_A(dp, region, akey, ns->lame)) {
+ lock_rw_unlock(&akey->entry.lock);
+ return 0;
+ }
+ log_nametypeclass(VERB_ALGO, "found in cache",
+ ns->name, LDNS_RR_TYPE_A, qclass);
+ lock_rw_unlock(&akey->entry.lock);
+ } else {
+ neg = msg_cache_lookup(env, ns->name, ns->namelen,
+ LDNS_RR_TYPE_A, qclass, now, 0);
+ if(neg) {
+ delegpt_add_neg_msg(dp, neg);
+ lock_rw_unlock(&neg->entry.lock);
+ }
+ }
+ akey = rrset_cache_lookup(env->rrset_cache, ns->name,
+ ns->namelen, LDNS_RR_TYPE_AAAA, qclass, 0, now, 0);
+ if(akey) {
+ if(!delegpt_add_rrset_AAAA(dp, region, akey, ns->lame)) {
+ lock_rw_unlock(&akey->entry.lock);
+ return 0;
+ }
+ log_nametypeclass(VERB_ALGO, "found in cache",
+ ns->name, LDNS_RR_TYPE_AAAA, qclass);
+ lock_rw_unlock(&akey->entry.lock);
+ } else {
+ neg = msg_cache_lookup(env, ns->name, ns->namelen,
+ LDNS_RR_TYPE_AAAA, qclass, now, 0);
+ if(neg) {
+ delegpt_add_neg_msg(dp, neg);
+ lock_rw_unlock(&neg->entry.lock);
+ }
+ }
+ }
+ return 1;
+}
+
+/** find and add DS or NSEC to delegation msg */
+static void
+find_add_ds(struct module_env* env, struct regional* region,
+ struct dns_msg* msg, struct delegpt* dp, time_t now)
+{
+ /* Lookup the DS or NSEC at the delegation point. */
+ struct ub_packed_rrset_key* rrset = rrset_cache_lookup(
+ env->rrset_cache, dp->name, dp->namelen, LDNS_RR_TYPE_DS,
+ msg->qinfo.qclass, 0, now, 0);
+ if(!rrset) {
+ /* NOTE: this won't work for alternate NSEC schemes
+ * (opt-in, NSEC3) */
+ rrset = rrset_cache_lookup(env->rrset_cache, dp->name,
+ dp->namelen, LDNS_RR_TYPE_NSEC, msg->qinfo.qclass,
+ 0, now, 0);
+ /* Note: the PACKED_RRSET_NSEC_AT_APEX flag is not used.
+ * since this is a referral, we need the NSEC at the parent
+ * side of the zone cut, not the NSEC at apex side. */
+ if(rrset && nsec_has_type(rrset, LDNS_RR_TYPE_DS)) {
+ lock_rw_unlock(&rrset->entry.lock);
+ rrset = NULL; /* discard wrong NSEC */
+ }
+ }
+ if(rrset) {
+ /* add it to auth section. This is the second rrset. */
+ if((msg->rep->rrsets[msg->rep->rrset_count] =
+ packed_rrset_copy_region(rrset, region, now))) {
+ msg->rep->ns_numrrsets++;
+ msg->rep->rrset_count++;
+ }
+ lock_rw_unlock(&rrset->entry.lock);
+ }
+}
+
+struct dns_msg*
+dns_msg_create(uint8_t* qname, size_t qnamelen, uint16_t qtype,
+ uint16_t qclass, struct regional* region, size_t capacity)
+{
+ struct dns_msg* msg = (struct dns_msg*)regional_alloc(region,
+ sizeof(struct dns_msg));
+ if(!msg)
+ return NULL;
+ msg->qinfo.qname = regional_alloc_init(region, qname, qnamelen);
+ if(!msg->qinfo.qname)
+ return NULL;
+ msg->qinfo.qname_len = qnamelen;
+ msg->qinfo.qtype = qtype;
+ msg->qinfo.qclass = qclass;
+ /* non-packed reply_info, because it needs to grow the array */
+ msg->rep = (struct reply_info*)regional_alloc_zero(region,
+ sizeof(struct reply_info)-sizeof(struct rrset_ref));
+ if(!msg->rep)
+ return NULL;
+ msg->rep->flags = BIT_QR; /* with QR, no AA */
+ msg->rep->qdcount = 1;
+ msg->rep->rrsets = (struct ub_packed_rrset_key**)
+ regional_alloc(region,
+ capacity*sizeof(struct ub_packed_rrset_key*));
+ if(!msg->rep->rrsets)
+ return NULL;
+ return msg;
+}
+
+int
+dns_msg_authadd(struct dns_msg* msg, struct regional* region,
+ struct ub_packed_rrset_key* rrset, time_t now)
+{
+ if(!(msg->rep->rrsets[msg->rep->rrset_count++] =
+ packed_rrset_copy_region(rrset, region, now)))
+ return 0;
+ msg->rep->ns_numrrsets++;
+ return 1;
+}
+
+struct delegpt*
+dns_cache_find_delegation(struct module_env* env, uint8_t* qname,
+ size_t qnamelen, uint16_t qtype, uint16_t qclass,
+ struct regional* region, struct dns_msg** msg, time_t now)
+{
+ /* try to find closest NS rrset */
+ struct ub_packed_rrset_key* nskey;
+ struct packed_rrset_data* nsdata;
+ struct delegpt* dp;
+
+ nskey = find_closest_of_type(env, qname, qnamelen, qclass, now,
+ LDNS_RR_TYPE_NS, 0);
+ if(!nskey) /* hope the caller has hints to prime or something */
+ return NULL;
+ nsdata = (struct packed_rrset_data*)nskey->entry.data;
+ /* got the NS key, create delegation point */
+ dp = delegpt_create(region);
+ if(!dp || !delegpt_set_name(dp, region, nskey->rk.dname)) {
+ lock_rw_unlock(&nskey->entry.lock);
+ log_err("find_delegation: out of memory");
+ return NULL;
+ }
+ /* create referral message */
+ if(msg) {
+ /* allocate the array to as much as we could need:
+ * NS rrset + DS/NSEC rrset +
+ * A rrset for every NS RR
+ * AAAA rrset for every NS RR
+ */
+ *msg = dns_msg_create(qname, qnamelen, qtype, qclass, region,
+ 2 + nsdata->count*2);
+ if(!*msg || !dns_msg_authadd(*msg, region, nskey, now)) {
+ lock_rw_unlock(&nskey->entry.lock);
+ log_err("find_delegation: out of memory");
+ return NULL;
+ }
+ }
+ if(!delegpt_rrset_add_ns(dp, region, nskey, 0))
+ log_err("find_delegation: addns out of memory");
+ lock_rw_unlock(&nskey->entry.lock); /* first unlock before next lookup*/
+ /* find and add DS/NSEC (if any) */
+ if(msg)
+ find_add_ds(env, region, *msg, dp, now);
+ /* find and add A entries */
+ if(!find_add_addrs(env, qclass, region, dp, now, msg))
+ log_err("find_delegation: addrs out of memory");
+ return dp;
+}
+
+/** allocate dns_msg from query_info and reply_info */
+static struct dns_msg*
+gen_dns_msg(struct regional* region, struct query_info* q, size_t num)
+{
+ struct dns_msg* msg = (struct dns_msg*)regional_alloc(region,
+ sizeof(struct dns_msg));
+ if(!msg)
+ return NULL;
+ memcpy(&msg->qinfo, q, sizeof(struct query_info));
+ msg->qinfo.qname = regional_alloc_init(region, q->qname, q->qname_len);
+ if(!msg->qinfo.qname)
+ return NULL;
+ /* allocate replyinfo struct and rrset key array separately */
+ msg->rep = (struct reply_info*)regional_alloc(region,
+ sizeof(struct reply_info) - sizeof(struct rrset_ref));
+ if(!msg->rep)
+ return NULL;
+ msg->rep->rrsets = (struct ub_packed_rrset_key**)
+ regional_alloc(region,
+ num * sizeof(struct ub_packed_rrset_key*));
+ if(!msg->rep->rrsets)
+ return NULL;
+ return msg;
+}
+
+/** generate dns_msg from cached message */
+static struct dns_msg*
+tomsg(struct module_env* env, struct query_info* q, struct reply_info* r,
+ struct regional* region, time_t now, struct regional* scratch)
+{
+ struct dns_msg* msg;
+ size_t i;
+ if(now > r->ttl)
+ return NULL;
+ msg = gen_dns_msg(region, q, r->rrset_count);
+ if(!msg)
+ return NULL;
+ msg->rep->flags = r->flags;
+ msg->rep->qdcount = r->qdcount;
+ msg->rep->ttl = r->ttl - now;
+ if(r->prefetch_ttl > now)
+ msg->rep->prefetch_ttl = r->prefetch_ttl - now;
+ else msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
+ msg->rep->security = r->security;
+ msg->rep->an_numrrsets = r->an_numrrsets;
+ msg->rep->ns_numrrsets = r->ns_numrrsets;
+ msg->rep->ar_numrrsets = r->ar_numrrsets;
+ msg->rep->rrset_count = r->rrset_count;
+ msg->rep->authoritative = r->authoritative;
+ if(!rrset_array_lock(r->ref, r->rrset_count, now))
+ return NULL;
+ if(r->an_numrrsets > 0 && (r->rrsets[0]->rk.type == htons(
+ LDNS_RR_TYPE_CNAME) || r->rrsets[0]->rk.type == htons(
+ LDNS_RR_TYPE_DNAME)) && !reply_check_cname_chain(r)) {
+ /* cname chain is now invalid, reconstruct msg */
+ rrset_array_unlock(r->ref, r->rrset_count);
+ return NULL;
+ }
+ if(r->security == sec_status_secure && !reply_all_rrsets_secure(r)) {
+ /* message rrsets have changed status, revalidate */
+ rrset_array_unlock(r->ref, r->rrset_count);
+ return NULL;
+ }
+ for(i=0; i<msg->rep->rrset_count; i++) {
+ msg->rep->rrsets[i] = packed_rrset_copy_region(r->rrsets[i],
+ region, now);
+ if(!msg->rep->rrsets[i]) {
+ rrset_array_unlock(r->ref, r->rrset_count);
+ return NULL;
+ }
+ }
+ rrset_array_unlock_touch(env->rrset_cache, scratch, r->ref,
+ r->rrset_count);
+ return msg;
+}
+
+/** synthesize RRset-only response from cached RRset item */
+static struct dns_msg*
+rrset_msg(struct ub_packed_rrset_key* rrset, struct regional* region,
+ time_t now, struct query_info* q)
+{
+ struct dns_msg* msg;
+ struct packed_rrset_data* d = (struct packed_rrset_data*)
+ rrset->entry.data;
+ if(now > d->ttl)
+ return NULL;
+ msg = gen_dns_msg(region, q, 1); /* only the CNAME (or other) RRset */
+ if(!msg)
+ return NULL;
+ msg->rep->flags = BIT_QR; /* reply, no AA, no error */
+ msg->rep->authoritative = 0; /* reply stored in cache can't be authoritative */
+ msg->rep->qdcount = 1;
+ msg->rep->ttl = d->ttl - now;
+ msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
+ msg->rep->security = sec_status_unchecked;
+ msg->rep->an_numrrsets = 1;
+ msg->rep->ns_numrrsets = 0;
+ msg->rep->ar_numrrsets = 0;
+ msg->rep->rrset_count = 1;
+ msg->rep->rrsets[0] = packed_rrset_copy_region(rrset, region, now);
+ if(!msg->rep->rrsets[0]) /* copy CNAME */
+ return NULL;
+ return msg;
+}
+
+/** synthesize DNAME+CNAME response from cached DNAME item */
+static struct dns_msg*
+synth_dname_msg(struct ub_packed_rrset_key* rrset, struct regional* region,
+ time_t now, struct query_info* q)
+{
+ struct dns_msg* msg;
+ struct ub_packed_rrset_key* ck;
+ struct packed_rrset_data* newd, *d = (struct packed_rrset_data*)
+ rrset->entry.data;
+ uint8_t* newname, *dtarg = NULL;
+ size_t newlen, dtarglen;
+ if(now > d->ttl)
+ return NULL;
+ /* only allow validated (with DNSSEC) DNAMEs used from cache
+ * for insecure DNAMEs, query again. */
+ if(d->security != sec_status_secure)
+ return NULL;
+ msg = gen_dns_msg(region, q, 2); /* DNAME + CNAME RRset */
+ if(!msg)
+ return NULL;
+ msg->rep->flags = BIT_QR; /* reply, no AA, no error */
+ msg->rep->authoritative = 0; /* reply stored in cache can't be authoritative */
+ msg->rep->qdcount = 1;
+ msg->rep->ttl = d->ttl - now;
+ msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
+ msg->rep->security = sec_status_unchecked;
+ msg->rep->an_numrrsets = 1;
+ msg->rep->ns_numrrsets = 0;
+ msg->rep->ar_numrrsets = 0;
+ msg->rep->rrset_count = 1;
+ msg->rep->rrsets[0] = packed_rrset_copy_region(rrset, region, now);
+ if(!msg->rep->rrsets[0]) /* copy DNAME */
+ return NULL;
+ /* synth CNAME rrset */
+ get_cname_target(rrset, &dtarg, &dtarglen);
+ if(!dtarg)
+ return NULL;
+ newlen = q->qname_len + dtarglen - rrset->rk.dname_len;
+ if(newlen > LDNS_MAX_DOMAINLEN) {
+ msg->rep->flags |= LDNS_RCODE_YXDOMAIN;
+ return msg;
+ }
+ newname = (uint8_t*)regional_alloc(region, newlen);
+ if(!newname)
+ return NULL;
+ /* new name is concatenation of qname front (without DNAME owner)
+ * and DNAME target name */
+ memcpy(newname, q->qname, q->qname_len-rrset->rk.dname_len);
+ memmove(newname+(q->qname_len-rrset->rk.dname_len), dtarg, dtarglen);
+ /* create rest of CNAME rrset */
+ ck = (struct ub_packed_rrset_key*)regional_alloc(region,
+ sizeof(struct ub_packed_rrset_key));
+ if(!ck)
+ return NULL;
+ memset(&ck->entry, 0, sizeof(ck->entry));
+ msg->rep->rrsets[1] = ck;
+ ck->entry.key = ck;
+ ck->rk.type = htons(LDNS_RR_TYPE_CNAME);
+ ck->rk.rrset_class = rrset->rk.rrset_class;
+ ck->rk.flags = 0;
+ ck->rk.dname = regional_alloc_init(region, q->qname, q->qname_len);
+ if(!ck->rk.dname)
+ return NULL;
+ ck->rk.dname_len = q->qname_len;
+ ck->entry.hash = rrset_key_hash(&ck->rk);
+ newd = (struct packed_rrset_data*)regional_alloc_zero(region,
+ sizeof(struct packed_rrset_data) + sizeof(size_t) +
+ sizeof(uint8_t*) + sizeof(time_t) + sizeof(uint16_t)
+ + newlen);
+ if(!newd)
+ return NULL;
+ ck->entry.data = newd;
+ newd->ttl = 0; /* 0 for synthesized CNAME TTL */
+ newd->count = 1;
+ newd->rrsig_count = 0;
+ newd->trust = rrset_trust_ans_noAA;
+ newd->rr_len = (size_t*)((uint8_t*)newd +
+ sizeof(struct packed_rrset_data));
+ newd->rr_len[0] = newlen + sizeof(uint16_t);
+ packed_rrset_ptr_fixup(newd);
+ newd->rr_ttl[0] = newd->ttl;
+ msg->rep->ttl = newd->ttl;
+ msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(newd->ttl);
+ sldns_write_uint16(newd->rr_data[0], newlen);
+ memmove(newd->rr_data[0] + sizeof(uint16_t), newname, newlen);
+ msg->rep->an_numrrsets ++;
+ msg->rep->rrset_count ++;
+ return msg;
+}
+
+struct dns_msg*
+dns_cache_lookup(struct module_env* env,
+ uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass,
+ struct regional* region, struct regional* scratch)
+{
+ struct lruhash_entry* e;
+ struct query_info k;
+ hashvalue_t h;
+ time_t now = *env->now;
+ struct ub_packed_rrset_key* rrset;
+
+ /* lookup first, this has both NXdomains and ANSWER responses */
+ k.qname = qname;
+ k.qname_len = qnamelen;
+ k.qtype = qtype;
+ k.qclass = qclass;
+ h = query_info_hash(&k);
+ e = slabhash_lookup(env->msg_cache, h, &k, 0);
+ if(e) {
+ struct msgreply_entry* key = (struct msgreply_entry*)e->key;
+ struct reply_info* data = (struct reply_info*)e->data;
+ struct dns_msg* msg = tomsg(env, &key->key, data, region, now,
+ scratch);
+ if(msg) {
+ lock_rw_unlock(&e->lock);
+ return msg;
+ }
+ /* could be msg==NULL; due to TTL or not all rrsets available */
+ lock_rw_unlock(&e->lock);
+ }
+
+ /* see if a DNAME exists. Checked for first, to enforce that DNAMEs
+ * are more important, the CNAME is resynthesized and thus
+ * consistent with the DNAME */
+ if( (rrset=find_closest_of_type(env, qname, qnamelen, qclass, now,
+ LDNS_RR_TYPE_DNAME, 1))) {
+ /* synthesize a DNAME+CNAME message based on this */
+ struct dns_msg* msg = synth_dname_msg(rrset, region, now, &k);
+ if(msg) {
+ lock_rw_unlock(&rrset->entry.lock);
+ return msg;
+ }
+ lock_rw_unlock(&rrset->entry.lock);
+ }
+
+ /* see if we have CNAME for this domain,
+ * but not for DS records (which are part of the parent) */
+ if( qtype != LDNS_RR_TYPE_DS &&
+ (rrset=rrset_cache_lookup(env->rrset_cache, qname, qnamelen,
+ LDNS_RR_TYPE_CNAME, qclass, 0, now, 0))) {
+ struct dns_msg* msg = rrset_msg(rrset, region, now, &k);
+ if(msg) {
+ lock_rw_unlock(&rrset->entry.lock);
+ return msg;
+ }
+ lock_rw_unlock(&rrset->entry.lock);
+ }
+
+ /* construct DS, DNSKEY, DLV messages from rrset cache. */
+ if((qtype == LDNS_RR_TYPE_DS || qtype == LDNS_RR_TYPE_DNSKEY ||
+ qtype == LDNS_RR_TYPE_DLV) &&
+ (rrset=rrset_cache_lookup(env->rrset_cache, qname, qnamelen,
+ qtype, qclass, 0, now, 0))) {
+ /* if the rrset is from the additional section, and the
+ * signatures have fallen off, then do not synthesize a msg
+ * instead, allow a full query for signed results to happen.
+ * Forego all rrset data from additional section, because
+ * some signatures may not be present and cause validation
+ * failure.
+ */
+ struct packed_rrset_data *d = (struct packed_rrset_data*)
+ rrset->entry.data;
+ if(d->trust != rrset_trust_add_noAA &&
+ d->trust != rrset_trust_add_AA &&
+ (qtype == LDNS_RR_TYPE_DS ||
+ (d->trust != rrset_trust_auth_noAA
+ && d->trust != rrset_trust_auth_AA) )) {
+ struct dns_msg* msg = rrset_msg(rrset, region, now, &k);
+ if(msg) {
+ lock_rw_unlock(&rrset->entry.lock);
+ return msg;
+ }
+ }
+ lock_rw_unlock(&rrset->entry.lock);
+ }
+
+ /* stop downwards cache search on NXDOMAIN.
+ * Empty nonterminals are NOERROR, so an NXDOMAIN for foo
+ * means bla.foo also does not exist. The DNSSEC proofs are
+ * the same. We search upwards for NXDOMAINs. */
+ if(env->cfg->harden_below_nxdomain)
+ while(!dname_is_root(k.qname)) {
+ dname_remove_label(&k.qname, &k.qname_len);
+ h = query_info_hash(&k);
+ e = slabhash_lookup(env->msg_cache, h, &k, 0);
+ if(e) {
+ struct reply_info* data = (struct reply_info*)e->data;
+ struct dns_msg* msg;
+ if(FLAGS_GET_RCODE(data->flags) == LDNS_RCODE_NXDOMAIN
+ && data->security == sec_status_secure
+ && (msg=tomsg(env, &k, data, region, now, scratch))){
+ lock_rw_unlock(&e->lock);
+ msg->qinfo.qname=qname;
+ msg->qinfo.qname_len=qnamelen;
+ /* check that DNSSEC really works out */
+ msg->rep->security = sec_status_unchecked;
+ return msg;
+ }
+ lock_rw_unlock(&e->lock);
+ }
+ }
+
+ return NULL;
+}
+
+int
+dns_cache_store(struct module_env* env, struct query_info* msgqinf,
+ struct reply_info* msgrep, int is_referral, time_t leeway, int pside,
+ struct regional* region)
+{
+ struct reply_info* rep = NULL;
+ /* alloc, malloc properly (not in region, like msg is) */
+ rep = reply_info_copy(msgrep, env->alloc, NULL);
+ if(!rep)
+ return 0;
+ /* ttl must be relative ;i.e. 0..86400 not time(0)+86400.
+ * the env->now is added to message and RRsets in this routine. */
+ /* the leeway is used to invalidate other rrsets earlier */
+
+ if(is_referral) {
+ /* store rrsets */
+ struct rrset_ref ref;
+ size_t i;
+ for(i=0; i<rep->rrset_count; i++) {
+ packed_rrset_ttl_add((struct packed_rrset_data*)
+ rep->rrsets[i]->entry.data, *env->now);
+ ref.key = rep->rrsets[i];
+ ref.id = rep->rrsets[i]->id;
+ /*ignore ret: it was in the cache, ref updated */
+ /* no leeway for typeNS */
+ (void)rrset_cache_update(env->rrset_cache, &ref,
+ env->alloc, *env->now +
+ ((ntohs(ref.key->rk.type)==LDNS_RR_TYPE_NS
+ && !pside) ? 0:leeway));
+ }
+ free(rep);
+ return 1;
+ } else {
+ /* store msg, and rrsets */
+ struct query_info qinf;
+ hashvalue_t h;
+
+ qinf = *msgqinf;
+ qinf.qname = memdup(msgqinf->qname, msgqinf->qname_len);
+ if(!qinf.qname) {
+ reply_info_parsedelete(rep, env->alloc);
+ return 0;
+ }
+ /* fixup flags to be sensible for a reply based on the cache */
+ /* this module means that RA is available. It is an answer QR.
+ * Not AA from cache. Not CD in cache (depends on client bit). */
+ rep->flags |= (BIT_RA | BIT_QR);
+ rep->flags &= ~(BIT_AA | BIT_CD);
+ h = query_info_hash(&qinf);
+ dns_cache_store_msg(env, &qinf, h, rep, leeway, pside, msgrep,
+ region);
+ /* qname is used inside query_info_entrysetup, and set to
+ * NULL. If it has not been used, free it. free(0) is safe. */
+ free(qinf.qname);
+ }
+ return 1;
+}
+
+int
+dns_cache_prefetch_adjust(struct module_env* env, struct query_info* qinfo,
+ time_t adjust)
+{
+ struct msgreply_entry* msg;
+ msg = msg_cache_lookup(env, qinfo->qname, qinfo->qname_len,
+ qinfo->qtype, qinfo->qclass, *env->now, 1);
+ if(msg) {
+ struct reply_info* rep = (struct reply_info*)msg->entry.data;
+ if(rep) {
+ rep->prefetch_ttl += adjust;
+ lock_rw_unlock(&msg->entry.lock);
+ return 1;
+ }
+ lock_rw_unlock(&msg->entry.lock);
+ }
+ return 0;
+}
diff --git a/external/unbound/services/cache/dns.h b/external/unbound/services/cache/dns.h
new file mode 100644
index 000000000..05a3e6296
--- /dev/null
+++ b/external/unbound/services/cache/dns.h
@@ -0,0 +1,194 @@
+/*
+ * services/cache/dns.h - Cache services for DNS using msg and rrset caches.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains the DNS cache.
+ */
+
+#ifndef SERVICES_CACHE_DNS_H
+#define SERVICES_CACHE_DNS_H
+#include "util/storage/lruhash.h"
+#include "util/data/msgreply.h"
+struct module_env;
+struct query_info;
+struct reply_info;
+struct regional;
+struct delegpt;
+
+/**
+ * Region allocated message reply
+ */
+struct dns_msg {
+ /** query info */
+ struct query_info qinfo;
+ /** reply info - ptr to packed repinfo structure */
+ struct reply_info *rep;
+};
+
+/**
+ * Allocate a dns_msg with malloc/alloc structure and store in dns cache.
+ *
+ * @param env: environment, with alloc structure and dns cache.
+ * @param qinf: query info, the query for which answer is stored.
+ * this is allocated in a region, and will be copied to malloc area
+ * before insertion.
+ * @param rep: reply in dns_msg from dns_alloc_msg for example.
+ * this is allocated in a region, and will be copied to malloc area
+ * before insertion.
+ * @param is_referral: If true, then the given message to be stored is a
+ * referral. The cache implementation may use this as a hint.
+ * It will store only the RRsets, not the message.
+ * @param leeway: TTL value, if not 0, other rrsets are considered expired
+ * that many seconds before actual TTL expiry.
+ * @param pside: if true, information came from a server which was fetched
+ * from the parentside of the zonecut. This means that the type NS
+ * can be updated to full TTL even in prefetch situations.
+ * @param region: region to allocate better entries from cache into.
+ * (used when is_referral is false).
+ * @return 0 on alloc error (out of memory).
+ */
+int dns_cache_store(struct module_env* env, struct query_info* qinf,
+ struct reply_info* rep, int is_referral, time_t leeway, int pside,
+ struct regional* region);
+
+/**
+ * Store message in the cache. Stores in message cache and rrset cache.
+ * Both qinfo and rep should be malloced and are put in the cache.
+ * They should not be used after this call, as they are then in shared cache.
+ * Does not return errors, they are logged and only lead to less cache.
+ *
+ * @param env: module environment with the DNS cache.
+ * @param qinfo: query info
+ * @param hash: hash over qinfo.
+ * @param rep: reply info, together with qinfo makes up the message.
+ * Adjusts the reply info TTLs to absolute time.
+ * @param leeway: TTL value, if not 0, other rrsets are considered expired
+ * that many seconds before actual TTL expiry.
+ * @param pside: if true, information came from a server which was fetched
+ * from the parentside of the zonecut. This means that the type NS
+ * can be updated to full TTL even in prefetch situations.
+ * @param qrep: message that can be altered with better rrs from cache.
+ * @param region: to allocate into for qmsg.
+ */
+void dns_cache_store_msg(struct module_env* env, struct query_info* qinfo,
+ hashvalue_t hash, struct reply_info* rep, time_t leeway, int pside,
+ struct reply_info* qrep, struct regional* region);
+
+/**
+ * Find a delegation from the cache.
+ * @param env: module environment with the DNS cache.
+ * @param qname: query name.
+ * @param qnamelen: length of qname.
+ * @param qtype: query type.
+ * @param qclass: query class.
+ * @param region: where to allocate result delegation.
+ * @param msg: if not NULL, delegation message is returned here, synthesized
+ * from the cache.
+ * @param timenow: the time now, for checking if TTL on cache entries is OK.
+ * @return new delegation or NULL on error or if not found in cache.
+ */
+struct delegpt* dns_cache_find_delegation(struct module_env* env,
+ uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass,
+ struct regional* region, struct dns_msg** msg, time_t timenow);
+
+/**
+ * Find cached message
+ * @param env: module environment with the DNS cache.
+ * @param qname: query name.
+ * @param qnamelen: length of qname.
+ * @param qtype: query type.
+ * @param qclass: query class.
+ * @param region: where to allocate result.
+ * @param scratch: where to allocate temporary data.
+ * @return new response message (alloced in region, rrsets do not have IDs).
+ * or NULL on error or if not found in cache.
+ * TTLs are made relative to the current time.
+ */
+struct dns_msg* dns_cache_lookup(struct module_env* env,
+ uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass,
+ struct regional* region, struct regional* scratch);
+
+/**
+ * find and add A and AAAA records for missing nameservers in delegpt
+ * @param env: module environment with rrset cache
+ * @param qclass: which class to look in.
+ * @param region: where to store new dp info.
+ * @param dp: delegation point to fill missing entries.
+ * @return false on alloc failure.
+ */
+int cache_fill_missing(struct module_env* env, uint16_t qclass,
+ struct regional* region, struct delegpt* dp);
+
+/**
+ * Utility, create new, unpacked data structure for cache response.
+ * QR bit set, no AA. Query set as indicated. Space for number of rrsets.
+ * @param qname: query section name
+ * @param qnamelen: len of qname
+ * @param qtype: query section type
+ * @param qclass: query section class
+ * @param region: where to alloc.
+ * @param capacity: number of rrsets space to create in the array.
+ * @return new dns_msg struct or NULL on mem fail.
+ */
+struct dns_msg* dns_msg_create(uint8_t* qname, size_t qnamelen, uint16_t qtype,
+ uint16_t qclass, struct regional* region, size_t capacity);
+
+/**
+ * Add rrset to authority section in unpacked dns_msg message. Must have enough
+ * space left, does not grow the array.
+ * @param msg: msg to put it in.
+ * @param region: region to alloc in
+ * @param rrset: to add in authority section
+ * @param now: now.
+ * @return true if worked, false on fail
+ */
+int dns_msg_authadd(struct dns_msg* msg, struct regional* region,
+ struct ub_packed_rrset_key* rrset, time_t now);
+
+/**
+ * Adjust the prefetch_ttl for a cached message. This adds a value to the
+ * prefetch ttl - postponing the time when it will be prefetched for future
+ * incoming queries.
+ * @param env: module environment with caches and time.
+ * @param qinfo: query info for the query that needs adjustment.
+ * @param adjust: time in seconds to add to the prefetch_leeway.
+ * @return false if not in cache. true if added.
+ */
+int dns_cache_prefetch_adjust(struct module_env* env, struct query_info* qinfo,
+ time_t adjust);
+
+#endif /* SERVICES_CACHE_DNS_H */
diff --git a/external/unbound/services/cache/infra.c b/external/unbound/services/cache/infra.c
new file mode 100644
index 000000000..07f2103d7
--- /dev/null
+++ b/external/unbound/services/cache/infra.c
@@ -0,0 +1,569 @@
+/*
+ * services/cache/infra.c - infrastructure cache, server rtt and capabilities
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains the infrastructure cache.
+ */
+#include "config.h"
+#include "ldns/rrdef.h"
+#include "services/cache/infra.h"
+#include "util/storage/slabhash.h"
+#include "util/storage/lookup3.h"
+#include "util/data/dname.h"
+#include "util/log.h"
+#include "util/net_help.h"
+#include "util/config_file.h"
+#include "iterator/iterator.h"
+
+/** Timeout when only a single probe query per IP is allowed. */
+#define PROBE_MAXRTO 12000 /* in msec */
+
+/** number of timeouts for a type when the domain can be blocked ;
+ * even if another type has completely rtt maxed it, the different type
+ * can do this number of packets (until those all timeout too) */
+#define TIMEOUT_COUNT_MAX 3
+
+size_t
+infra_sizefunc(void* k, void* ATTR_UNUSED(d))
+{
+ struct infra_key* key = (struct infra_key*)k;
+ return sizeof(*key) + sizeof(struct infra_data) + key->namelen
+ + lock_get_mem(&key->entry.lock);
+}
+
+int
+infra_compfunc(void* key1, void* key2)
+{
+ struct infra_key* k1 = (struct infra_key*)key1;
+ struct infra_key* k2 = (struct infra_key*)key2;
+ int r = sockaddr_cmp(&k1->addr, k1->addrlen, &k2->addr, k2->addrlen);
+ if(r != 0)
+ return r;
+ if(k1->namelen != k2->namelen) {
+ if(k1->namelen < k2->namelen)
+ return -1;
+ return 1;
+ }
+ return query_dname_compare(k1->zonename, k2->zonename);
+}
+
+void
+infra_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
+{
+ struct infra_key* key = (struct infra_key*)k;
+ if(!key)
+ return;
+ lock_rw_destroy(&key->entry.lock);
+ free(key->zonename);
+ free(key);
+}
+
+void
+infra_deldatafunc(void* d, void* ATTR_UNUSED(arg))
+{
+ struct infra_data* data = (struct infra_data*)d;
+ free(data);
+}
+
+struct infra_cache*
+infra_create(struct config_file* cfg)
+{
+ struct infra_cache* infra = (struct infra_cache*)calloc(1,
+ sizeof(struct infra_cache));
+ size_t maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
+ sizeof(struct infra_data)+INFRA_BYTES_NAME);
+ infra->hosts = slabhash_create(cfg->infra_cache_slabs,
+ INFRA_HOST_STARTSIZE, maxmem, &infra_sizefunc, &infra_compfunc,
+ &infra_delkeyfunc, &infra_deldatafunc, NULL);
+ if(!infra->hosts) {
+ free(infra);
+ return NULL;
+ }
+ infra->host_ttl = cfg->host_ttl;
+ return infra;
+}
+
+void
+infra_delete(struct infra_cache* infra)
+{
+ if(!infra)
+ return;
+ slabhash_delete(infra->hosts);
+ free(infra);
+}
+
+struct infra_cache*
+infra_adjust(struct infra_cache* infra, struct config_file* cfg)
+{
+ size_t maxmem;
+ if(!infra)
+ return infra_create(cfg);
+ infra->host_ttl = cfg->host_ttl;
+ maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
+ sizeof(struct infra_data)+INFRA_BYTES_NAME);
+ if(maxmem != slabhash_get_size(infra->hosts) ||
+ cfg->infra_cache_slabs != infra->hosts->size) {
+ infra_delete(infra);
+ infra = infra_create(cfg);
+ }
+ return infra;
+}
+
+/** calculate the hash value for a host key */
+static hashvalue_t
+hash_addr(struct sockaddr_storage* addr, socklen_t addrlen)
+{
+ hashvalue_t h = 0xab;
+ /* select the pieces to hash, some OS have changing data inside */
+ if(addr_is_ip6(addr, addrlen)) {
+ struct sockaddr_in6* in6 = (struct sockaddr_in6*)addr;
+ h = hashlittle(&in6->sin6_family, sizeof(in6->sin6_family), h);
+ h = hashlittle(&in6->sin6_port, sizeof(in6->sin6_port), h);
+ h = hashlittle(&in6->sin6_addr, INET6_SIZE, h);
+ } else {
+ struct sockaddr_in* in = (struct sockaddr_in*)addr;
+ h = hashlittle(&in->sin_family, sizeof(in->sin_family), h);
+ h = hashlittle(&in->sin_port, sizeof(in->sin_port), h);
+ h = hashlittle(&in->sin_addr, INET_SIZE, h);
+ }
+ return h;
+}
+
+/** calculate infra hash for a key */
+static hashvalue_t
+hash_infra(struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name)
+{
+ return dname_query_hash(name, hash_addr(addr, addrlen));
+}
+
+/** lookup version that does not check host ttl (you check it) */
+struct lruhash_entry*
+infra_lookup_nottl(struct infra_cache* infra, struct sockaddr_storage* addr,
+ socklen_t addrlen, uint8_t* name, size_t namelen, int wr)
+{
+ struct infra_key k;
+ k.addrlen = addrlen;
+ memcpy(&k.addr, addr, addrlen);
+ k.namelen = namelen;
+ k.zonename = name;
+ k.entry.hash = hash_infra(addr, addrlen, name);
+ k.entry.key = (void*)&k;
+ k.entry.data = NULL;
+ return slabhash_lookup(infra->hosts, k.entry.hash, &k, wr);
+}
+
+/** init the data elements */
+static void
+data_entry_init(struct infra_cache* infra, struct lruhash_entry* e,
+ time_t timenow)
+{
+ struct infra_data* data = (struct infra_data*)e->data;
+ data->ttl = timenow + infra->host_ttl;
+ rtt_init(&data->rtt);
+ data->edns_version = 0;
+ data->edns_lame_known = 0;
+ data->probedelay = 0;
+ data->isdnsseclame = 0;
+ data->rec_lame = 0;
+ data->lame_type_A = 0;
+ data->lame_other = 0;
+ data->timeout_A = 0;
+ data->timeout_AAAA = 0;
+ data->timeout_other = 0;
+}
+
+/**
+ * Create and init a new entry for a host
+ * @param infra: infra structure with config parameters.
+ * @param addr: host address.
+ * @param addrlen: length of addr.
+ * @param name: name of zone
+ * @param namelen: length of name.
+ * @param tm: time now.
+ * @return: the new entry or NULL on malloc failure.
+ */
+static struct lruhash_entry*
+new_entry(struct infra_cache* infra, struct sockaddr_storage* addr,
+ socklen_t addrlen, uint8_t* name, size_t namelen, time_t tm)
+{
+ struct infra_data* data;
+ struct infra_key* key = (struct infra_key*)malloc(sizeof(*key));
+ if(!key)
+ return NULL;
+ data = (struct infra_data*)malloc(sizeof(struct infra_data));
+ if(!data) {
+ free(key);
+ return NULL;
+ }
+ key->zonename = memdup(name, namelen);
+ if(!key->zonename) {
+ free(key);
+ free(data);
+ return NULL;
+ }
+ key->namelen = namelen;
+ lock_rw_init(&key->entry.lock);
+ key->entry.hash = hash_infra(addr, addrlen, name);
+ key->entry.key = (void*)key;
+ key->entry.data = (void*)data;
+ key->addrlen = addrlen;
+ memcpy(&key->addr, addr, addrlen);
+ data_entry_init(infra, &key->entry, tm);
+ return &key->entry;
+}
+
+int
+infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
+ socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow,
+ int* edns_vs, uint8_t* edns_lame_known, int* to)
+{
+ struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
+ nm, nmlen, 0);
+ struct infra_data* data;
+ int wr = 0;
+ if(e && ((struct infra_data*)e->data)->ttl < timenow) {
+ /* it expired, try to reuse existing entry */
+ int old = ((struct infra_data*)e->data)->rtt.rto;
+ uint8_t tA = ((struct infra_data*)e->data)->timeout_A;
+ uint8_t tAAAA = ((struct infra_data*)e->data)->timeout_AAAA;
+ uint8_t tother = ((struct infra_data*)e->data)->timeout_other;
+ lock_rw_unlock(&e->lock);
+ e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
+ if(e) {
+ /* if its still there we have a writelock, init */
+ /* re-initialise */
+ /* do not touch lameness, it may be valid still */
+ data_entry_init(infra, e, timenow);
+ wr = 1;
+ /* TOP_TIMEOUT remains on reuse */
+ if(old >= USEFUL_SERVER_TOP_TIMEOUT) {
+ ((struct infra_data*)e->data)->rtt.rto
+ = USEFUL_SERVER_TOP_TIMEOUT;
+ ((struct infra_data*)e->data)->timeout_A = tA;
+ ((struct infra_data*)e->data)->timeout_AAAA = tAAAA;
+ ((struct infra_data*)e->data)->timeout_other = tother;
+ }
+ }
+ }
+ if(!e) {
+ /* insert new entry */
+ if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
+ return 0;
+ data = (struct infra_data*)e->data;
+ *edns_vs = data->edns_version;
+ *edns_lame_known = data->edns_lame_known;
+ *to = rtt_timeout(&data->rtt);
+ slabhash_insert(infra->hosts, e->hash, e, data, NULL);
+ return 1;
+ }
+ /* use existing entry */
+ data = (struct infra_data*)e->data;
+ *edns_vs = data->edns_version;
+ *edns_lame_known = data->edns_lame_known;
+ *to = rtt_timeout(&data->rtt);
+ if(*to >= PROBE_MAXRTO && rtt_notimeout(&data->rtt)*4 <= *to) {
+ /* delay other queries, this is the probe query */
+ if(!wr) {
+ lock_rw_unlock(&e->lock);
+ e = infra_lookup_nottl(infra, addr,addrlen,nm,nmlen, 1);
+ if(!e) { /* flushed from cache real fast, no use to
+ allocate just for the probedelay */
+ return 1;
+ }
+ data = (struct infra_data*)e->data;
+ }
+ /* add 999 to round up the timeout value from msec to sec,
+ * then add a whole second so it is certain that this probe
+ * has timed out before the next is allowed */
+ data->probedelay = timenow + ((*to)+1999)/1000;
+ }
+ lock_rw_unlock(&e->lock);
+ return 1;
+}
+
+int
+infra_set_lame(struct infra_cache* infra, struct sockaddr_storage* addr,
+ socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow,
+ int dnsseclame, int reclame, uint16_t qtype)
+{
+ struct infra_data* data;
+ struct lruhash_entry* e;
+ int needtoinsert = 0;
+ e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
+ if(!e) {
+ /* insert it */
+ if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) {
+ log_err("set_lame: malloc failure");
+ return 0;
+ }
+ needtoinsert = 1;
+ } else if( ((struct infra_data*)e->data)->ttl < timenow) {
+ /* expired, reuse existing entry */
+ data_entry_init(infra, e, timenow);
+ }
+ /* got an entry, now set the zone lame */
+ data = (struct infra_data*)e->data;
+ /* merge data (if any) */
+ if(dnsseclame)
+ data->isdnsseclame = 1;
+ if(reclame)
+ data->rec_lame = 1;
+ if(!dnsseclame && !reclame && qtype == LDNS_RR_TYPE_A)
+ data->lame_type_A = 1;
+ if(!dnsseclame && !reclame && qtype != LDNS_RR_TYPE_A)
+ data->lame_other = 1;
+ /* done */
+ if(needtoinsert)
+ slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
+ else { lock_rw_unlock(&e->lock); }
+ return 1;
+}
+
+void
+infra_update_tcp_works(struct infra_cache* infra,
+ struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
+ size_t nmlen)
+{
+ struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
+ nm, nmlen, 1);
+ struct infra_data* data;
+ if(!e)
+ return; /* doesn't exist */
+ data = (struct infra_data*)e->data;
+ if(data->rtt.rto >= RTT_MAX_TIMEOUT)
+ /* do not disqualify this server altogether, it is better
+ * than nothing */
+ data->rtt.rto = RTT_MAX_TIMEOUT-1000;
+ lock_rw_unlock(&e->lock);
+}
+
+int
+infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr,
+ socklen_t addrlen, uint8_t* nm, size_t nmlen, int qtype,
+ int roundtrip, int orig_rtt, time_t timenow)
+{
+ struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
+ nm, nmlen, 1);
+ struct infra_data* data;
+ int needtoinsert = 0;
+ int rto = 1;
+ if(!e) {
+ if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
+ return 0;
+ needtoinsert = 1;
+ } else if(((struct infra_data*)e->data)->ttl < timenow) {
+ data_entry_init(infra, e, timenow);
+ }
+ /* have an entry, update the rtt */
+ data = (struct infra_data*)e->data;
+ if(roundtrip == -1) {
+ rtt_lost(&data->rtt, orig_rtt);
+ if(qtype == LDNS_RR_TYPE_A) {
+ if(data->timeout_A < TIMEOUT_COUNT_MAX)
+ data->timeout_A++;
+ } else if(qtype == LDNS_RR_TYPE_AAAA) {
+ if(data->timeout_AAAA < TIMEOUT_COUNT_MAX)
+ data->timeout_AAAA++;
+ } else {
+ if(data->timeout_other < TIMEOUT_COUNT_MAX)
+ data->timeout_other++;
+ }
+ } else {
+ /* if we got a reply, but the old timeout was above server
+ * selection height, delete the timeout so the server is
+ * fully available again */
+ if(rtt_unclamped(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT)
+ rtt_init(&data->rtt);
+ rtt_update(&data->rtt, roundtrip);
+ data->probedelay = 0;
+ if(qtype == LDNS_RR_TYPE_A)
+ data->timeout_A = 0;
+ else if(qtype == LDNS_RR_TYPE_AAAA)
+ data->timeout_AAAA = 0;
+ else data->timeout_other = 0;
+ }
+ if(data->rtt.rto > 0)
+ rto = data->rtt.rto;
+
+ if(needtoinsert)
+ slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
+ else { lock_rw_unlock(&e->lock); }
+ return rto;
+}
+
+long long infra_get_host_rto(struct infra_cache* infra,
+ struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
+ size_t nmlen, struct rtt_info* rtt, int* delay, time_t timenow,
+ int* tA, int* tAAAA, int* tother)
+{
+ struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
+ nm, nmlen, 0);
+ struct infra_data* data;
+ long long ttl = -2;
+ if(!e) return -1;
+ data = (struct infra_data*)e->data;
+ if(data->ttl >= timenow) {
+ ttl = (long long)(data->ttl - timenow);
+ memmove(rtt, &data->rtt, sizeof(*rtt));
+ if(timenow < data->probedelay)
+ *delay = (int)(data->probedelay - timenow);
+ else *delay = 0;
+ }
+ *tA = (int)data->timeout_A;
+ *tAAAA = (int)data->timeout_AAAA;
+ *tother = (int)data->timeout_other;
+ lock_rw_unlock(&e->lock);
+ return ttl;
+}
+
+int
+infra_edns_update(struct infra_cache* infra, struct sockaddr_storage* addr,
+ socklen_t addrlen, uint8_t* nm, size_t nmlen, int edns_version,
+ time_t timenow)
+{
+ struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
+ nm, nmlen, 1);
+ struct infra_data* data;
+ int needtoinsert = 0;
+ if(!e) {
+ if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
+ return 0;
+ needtoinsert = 1;
+ } else if(((struct infra_data*)e->data)->ttl < timenow) {
+ data_entry_init(infra, e, timenow);
+ }
+ /* have an entry, update the rtt, and the ttl */
+ data = (struct infra_data*)e->data;
+ /* do not update if noEDNS and stored is yesEDNS */
+ if(!(edns_version == -1 && (data->edns_version != -1 &&
+ data->edns_lame_known))) {
+ data->edns_version = edns_version;
+ data->edns_lame_known = 1;
+ }
+
+ if(needtoinsert)
+ slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
+ else { lock_rw_unlock(&e->lock); }
+ return 1;
+}
+
+int
+infra_get_lame_rtt(struct infra_cache* infra,
+ struct sockaddr_storage* addr, socklen_t addrlen,
+ uint8_t* name, size_t namelen, uint16_t qtype,
+ int* lame, int* dnsseclame, int* reclame, int* rtt, time_t timenow)
+{
+ struct infra_data* host;
+ struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
+ name, namelen, 0);
+ if(!e)
+ return 0;
+ host = (struct infra_data*)e->data;
+ *rtt = rtt_unclamped(&host->rtt);
+ if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay
+ && rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) {
+ /* single probe for this domain, and we are not probing */
+ /* unless the query type allows a probe to happen */
+ if(qtype == LDNS_RR_TYPE_A) {
+ if(host->timeout_A >= TIMEOUT_COUNT_MAX)
+ *rtt = USEFUL_SERVER_TOP_TIMEOUT;
+ else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
+ } else if(qtype == LDNS_RR_TYPE_AAAA) {
+ if(host->timeout_AAAA >= TIMEOUT_COUNT_MAX)
+ *rtt = USEFUL_SERVER_TOP_TIMEOUT;
+ else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
+ } else {
+ if(host->timeout_other >= TIMEOUT_COUNT_MAX)
+ *rtt = USEFUL_SERVER_TOP_TIMEOUT;
+ else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
+ }
+ }
+ if(timenow > host->ttl) {
+ /* expired entry */
+ /* see if this can be a re-probe of an unresponsive server */
+ /* minus 1000 because that is outside of the RTTBAND, so
+ * blacklisted servers stay blacklisted if this is chosen */
+ if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT) {
+ lock_rw_unlock(&e->lock);
+ *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
+ *lame = 0;
+ *dnsseclame = 0;
+ *reclame = 0;
+ return 1;
+ }
+ lock_rw_unlock(&e->lock);
+ return 0;
+ }
+ /* check lameness first */
+ if(host->lame_type_A && qtype == LDNS_RR_TYPE_A) {
+ lock_rw_unlock(&e->lock);
+ *lame = 1;
+ *dnsseclame = 0;
+ *reclame = 0;
+ return 1;
+ } else if(host->lame_other && qtype != LDNS_RR_TYPE_A) {
+ lock_rw_unlock(&e->lock);
+ *lame = 1;
+ *dnsseclame = 0;
+ *reclame = 0;
+ return 1;
+ } else if(host->isdnsseclame) {
+ lock_rw_unlock(&e->lock);
+ *lame = 0;
+ *dnsseclame = 1;
+ *reclame = 0;
+ return 1;
+ } else if(host->rec_lame) {
+ lock_rw_unlock(&e->lock);
+ *lame = 0;
+ *dnsseclame = 0;
+ *reclame = 1;
+ return 1;
+ }
+ /* no lameness for this type of query */
+ lock_rw_unlock(&e->lock);
+ *lame = 0;
+ *dnsseclame = 0;
+ *reclame = 0;
+ return 1;
+}
+
+size_t
+infra_get_mem(struct infra_cache* infra)
+{
+ return sizeof(*infra) + slabhash_get_mem(infra->hosts);
+}
diff --git a/external/unbound/services/cache/infra.h b/external/unbound/services/cache/infra.h
new file mode 100644
index 000000000..fc54f7f0d
--- /dev/null
+++ b/external/unbound/services/cache/infra.h
@@ -0,0 +1,309 @@
+/*
+ * services/cache/infra.h - infrastructure cache, server rtt and capabilities
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains the infrastructure cache.
+ */
+
+#ifndef SERVICES_CACHE_INFRA_H
+#define SERVICES_CACHE_INFRA_H
+#include "util/storage/lruhash.h"
+#include "util/rtt.h"
+struct slabhash;
+struct config_file;
+
+/**
+ * Host information kept for every server, per zone.
+ */
+struct infra_key {
+ /** the host address. */
+ struct sockaddr_storage addr;
+ /** length of addr. */
+ socklen_t addrlen;
+ /** zone name in wireformat */
+ uint8_t* zonename;
+ /** length of zonename */
+ size_t namelen;
+ /** hash table entry, data of type infra_data. */
+ struct lruhash_entry entry;
+};
+
+/**
+ * Host information encompasses host capabilities and retransmission timeouts.
+ * And lameness information (notAuthoritative, noEDNS, Recursive)
+ */
+struct infra_data {
+ /** TTL value for this entry. absolute time. */
+ time_t ttl;
+
+ /** time in seconds (absolute) when probing re-commences, 0 disabled */
+ time_t probedelay;
+ /** round trip times for timeout calculation */
+ struct rtt_info rtt;
+
+ /** edns version that the host supports, -1 means no EDNS */
+ int edns_version;
+ /** if the EDNS lameness is already known or not.
+ * EDNS lame is when EDNS queries or replies are dropped,
+ * and cause a timeout */
+ uint8_t edns_lame_known;
+
+ /** is the host lame (does not serve the zone authoritatively),
+ * or is the host dnssec lame (does not serve DNSSEC data) */
+ uint8_t isdnsseclame;
+ /** is the host recursion lame (not AA, but RA) */
+ uint8_t rec_lame;
+ /** the host is lame (not authoritative) for A records */
+ uint8_t lame_type_A;
+ /** the host is lame (not authoritative) for other query types */
+ uint8_t lame_other;
+
+ /** timeouts counter for type A */
+ uint8_t timeout_A;
+ /** timeouts counter for type AAAA */
+ uint8_t timeout_AAAA;
+ /** timeouts counter for others */
+ uint8_t timeout_other;
+};
+
+/**
+ * Infra cache
+ */
+struct infra_cache {
+ /** The hash table with hosts */
+ struct slabhash* hosts;
+ /** TTL value for host information, in seconds */
+ int host_ttl;
+};
+
+/** infra host cache default hash lookup size */
+#define INFRA_HOST_STARTSIZE 32
+/** bytes per zonename reserved in the hostcache, dnamelen(zonename.com.) */
+#define INFRA_BYTES_NAME 14
+
+/**
+ * Create infra cache.
+ * @param cfg: config parameters or NULL for defaults.
+ * @return: new infra cache, or NULL.
+ */
+struct infra_cache* infra_create(struct config_file* cfg);
+
+/**
+ * Delete infra cache.
+ * @param infra: infrastructure cache to delete.
+ */
+void infra_delete(struct infra_cache* infra);
+
+/**
+ * Adjust infra cache to use updated configuration settings.
+ * This may clean the cache. Operates a bit like realloc.
+ * There may be no threading or use by other threads.
+ * @param infra: existing cache. If NULL a new infra cache is returned.
+ * @param cfg: config options.
+ * @return the new infra cache pointer or NULL on error.
+ */
+struct infra_cache* infra_adjust(struct infra_cache* infra,
+ struct config_file* cfg);
+
+/**
+ * Plain find infra data function (used by the the other functions)
+ * @param infra: infrastructure cache.
+ * @param addr: host address.
+ * @param addrlen: length of addr.
+ * @param name: domain name of zone.
+ * @param namelen: length of domain name.
+ * @param wr: if true, writelock, else readlock.
+ * @return the entry, could be expired (this is not checked) or NULL.
+ */
+struct lruhash_entry* infra_lookup_nottl(struct infra_cache* infra,
+ struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name,
+ size_t namelen, int wr);
+
+/**
+ * Find host information to send a packet. Creates new entry if not found.
+ * Lameness is empty. EDNS is 0 (try with first), and rtt is returned for
+ * the first message to it.
+ * Use this to send a packet only, because it also locks out others when
+ * probing is restricted.
+ * @param infra: infrastructure cache.
+ * @param addr: host address.
+ * @param addrlen: length of addr.
+ * @param name: domain name of zone.
+ * @param namelen: length of domain name.
+ * @param timenow: what time it is now.
+ * @param edns_vs: edns version it supports, is returned.
+ * @param edns_lame_known: if EDNS lame (EDNS is dropped in transit) has
+ * already been probed, is returned.
+ * @param to: timeout to use, is returned.
+ * @return: 0 on error.
+ */
+int infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
+ socklen_t addrlen, uint8_t* name, size_t namelen,
+ time_t timenow, int* edns_vs, uint8_t* edns_lame_known, int* to);
+
+/**
+ * Set a host to be lame for the given zone.
+ * @param infra: infrastructure cache.
+ * @param addr: host address.
+ * @param addrlen: length of addr.
+ * @param name: domain name of zone apex.
+ * @param namelen: length of domain name.
+ * @param timenow: what time it is now.
+ * @param dnsseclame: if true the host is set dnssec lame.
+ * if false, the host is marked lame (not serving the zone).
+ * @param reclame: if true host is a recursor not AA server.
+ * if false, dnsseclame or marked lame.
+ * @param qtype: the query type for which it is lame.
+ * @return: 0 on error.
+ */
+int infra_set_lame(struct infra_cache* infra,
+ struct sockaddr_storage* addr, socklen_t addrlen,
+ uint8_t* name, size_t namelen, time_t timenow, int dnsseclame,
+ int reclame, uint16_t qtype);
+
+/**
+ * Update rtt information for the host.
+ * @param infra: infrastructure cache.
+ * @param addr: host address.
+ * @param addrlen: length of addr.
+ * @param name: zone name
+ * @param namelen: zone name length
+ * @param qtype: query type.
+ * @param roundtrip: estimate of roundtrip time in milliseconds or -1 for
+ * timeout.
+ * @param orig_rtt: original rtt for the query that timed out (roundtrip==-1).
+ * ignored if roundtrip != -1.
+ * @param timenow: what time it is now.
+ * @return: 0 on error. new rto otherwise.
+ */
+int infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr,
+ socklen_t addrlen, uint8_t* name, size_t namelen, int qtype,
+ int roundtrip, int orig_rtt, time_t timenow);
+
+/**
+ * Update information for the host, store that a TCP transaction works.
+ * @param infra: infrastructure cache.
+ * @param addr: host address.
+ * @param addrlen: length of addr.
+ * @param name: name of zone
+ * @param namelen: length of name
+ */
+void infra_update_tcp_works(struct infra_cache* infra,
+ struct sockaddr_storage* addr, socklen_t addrlen,
+ uint8_t* name, size_t namelen);
+
+/**
+ * Update edns information for the host.
+ * @param infra: infrastructure cache.
+ * @param addr: host address.
+ * @param addrlen: length of addr.
+ * @param name: name of zone
+ * @param namelen: length of name
+ * @param edns_version: the version that it publishes.
+ * If it is known to support EDNS then no-EDNS is not stored over it.
+ * @param timenow: what time it is now.
+ * @return: 0 on error.
+ */
+int infra_edns_update(struct infra_cache* infra,
+ struct sockaddr_storage* addr, socklen_t addrlen,
+ uint8_t* name, size_t namelen, int edns_version, time_t timenow);
+
+/**
+ * Get Lameness information and average RTT if host is in the cache.
+ * This information is to be used for server selection.
+ * @param infra: infrastructure cache.
+ * @param addr: host address.
+ * @param addrlen: length of addr.
+ * @param name: zone name.
+ * @param namelen: zone name length.
+ * @param qtype: the query to be made.
+ * @param lame: if function returns true, this returns lameness of the zone.
+ * @param dnsseclame: if function returns true, this returns if the zone
+ * is dnssec-lame.
+ * @param reclame: if function returns true, this is if it is recursion lame.
+ * @param rtt: if function returns true, this returns avg rtt of the server.
+ * The rtt value is unclamped and reflects recent timeouts.
+ * @param timenow: what time it is now.
+ * @return if found in cache, or false if not (or TTL bad).
+ */
+int infra_get_lame_rtt(struct infra_cache* infra,
+ struct sockaddr_storage* addr, socklen_t addrlen,
+ uint8_t* name, size_t namelen, uint16_t qtype,
+ int* lame, int* dnsseclame, int* reclame, int* rtt, time_t timenow);
+
+/**
+ * Get additional (debug) info on timing.
+ * @param infra: infra cache.
+ * @param addr: host address.
+ * @param addrlen: length of addr.
+ * @param name: zone name
+ * @param namelen: zone name length
+ * @param rtt: the rtt_info is copied into here (caller alloced return struct).
+ * @param delay: probe delay (if any).
+ * @param timenow: what time it is now.
+ * @param tA: timeout counter on type A.
+ * @param tAAAA: timeout counter on type AAAA.
+ * @param tother: timeout counter on type other.
+ * @return TTL the infra host element is valid for. If -1: not found in cache.
+ * TTL -2: found but expired.
+ */
+long long infra_get_host_rto(struct infra_cache* infra,
+ struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name,
+ size_t namelen, struct rtt_info* rtt, int* delay, time_t timenow,
+ int* tA, int* tAAAA, int* tother);
+
+/**
+ * Get memory used by the infra cache.
+ * @param infra: infrastructure cache.
+ * @return memory in use in bytes.
+ */
+size_t infra_get_mem(struct infra_cache* infra);
+
+/** calculate size for the hashtable, does not count size of lameness,
+ * so the hashtable is a fixed number of items */
+size_t infra_sizefunc(void* k, void* d);
+
+/** compare two addresses, returns -1, 0, or +1 */
+int infra_compfunc(void* key1, void* key2);
+
+/** delete key, and destroy the lock */
+void infra_delkeyfunc(void* k, void* arg);
+
+/** delete data and destroy the lameness hashtable */
+void infra_deldatafunc(void* d, void* arg);
+
+#endif /* SERVICES_CACHE_INFRA_H */
diff --git a/external/unbound/services/cache/rrset.c b/external/unbound/services/cache/rrset.c
new file mode 100644
index 000000000..5f52dbce1
--- /dev/null
+++ b/external/unbound/services/cache/rrset.c
@@ -0,0 +1,417 @@
+/*
+ * services/cache/rrset.c - Resource record set cache.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains the rrset cache.
+ */
+#include "config.h"
+#include "services/cache/rrset.h"
+#include "ldns/rrdef.h"
+#include "util/storage/slabhash.h"
+#include "util/config_file.h"
+#include "util/data/packed_rrset.h"
+#include "util/data/msgreply.h"
+#include "util/regional.h"
+#include "util/alloc.h"
+
+void
+rrset_markdel(void* key)
+{
+ struct ub_packed_rrset_key* r = (struct ub_packed_rrset_key*)key;
+ r->id = 0;
+}
+
+struct rrset_cache* rrset_cache_create(struct config_file* cfg,
+ struct alloc_cache* alloc)
+{
+ size_t slabs = (cfg?cfg->rrset_cache_slabs:HASH_DEFAULT_SLABS);
+ size_t startarray = HASH_DEFAULT_STARTARRAY;
+ size_t maxmem = (cfg?cfg->rrset_cache_size:HASH_DEFAULT_MAXMEM);
+
+ struct rrset_cache *r = (struct rrset_cache*)slabhash_create(slabs,
+ startarray, maxmem, ub_rrset_sizefunc, ub_rrset_compare,
+ ub_rrset_key_delete, rrset_data_delete, alloc);
+ slabhash_setmarkdel(&r->table, &rrset_markdel);
+ return r;
+}
+
+void rrset_cache_delete(struct rrset_cache* r)
+{
+ if(!r)
+ return;
+ slabhash_delete(&r->table);
+ /* slabhash delete also does free(r), since table is first in struct*/
+}
+
+struct rrset_cache* rrset_cache_adjust(struct rrset_cache *r,
+ struct config_file* cfg, struct alloc_cache* alloc)
+{
+ if(!r || !cfg || cfg->rrset_cache_slabs != r->table.size ||
+ cfg->rrset_cache_size != slabhash_get_size(&r->table))
+ {
+ rrset_cache_delete(r);
+ r = rrset_cache_create(cfg, alloc);
+ }
+ return r;
+}
+
+void
+rrset_cache_touch(struct rrset_cache* r, struct ub_packed_rrset_key* key,
+ hashvalue_t hash, rrset_id_t id)
+{
+ struct lruhash* table = slabhash_gettable(&r->table, hash);
+ /*
+ * This leads to locking problems, deadlocks, if the caller is
+ * holding any other rrset lock.
+ * Because a lookup through the hashtable does:
+ * tablelock -> entrylock (for that entry caller holds)
+ * And this would do
+ * entrylock(already held) -> tablelock
+ * And if two threads do this, it results in deadlock.
+ * So, the caller must not hold entrylock.
+ */
+ lock_quick_lock(&table->lock);
+ /* we have locked the hash table, the item can still be deleted.
+ * because it could already have been reclaimed, but not yet set id=0.
+ * This is because some lruhash routines have lazy deletion.
+ * so, we must acquire a lock on the item to verify the id != 0.
+ * also, with hash not changed, we are using the right slab.
+ */
+ lock_rw_rdlock(&key->entry.lock);
+ if(key->id == id && key->entry.hash == hash) {
+ lru_touch(table, &key->entry);
+ }
+ lock_rw_unlock(&key->entry.lock);
+ lock_quick_unlock(&table->lock);
+}
+
+/** see if rrset needs to be updated in the cache */
+static int
+need_to_update_rrset(void* nd, void* cd, time_t timenow, int equal, int ns)
+{
+ struct packed_rrset_data* newd = (struct packed_rrset_data*)nd;
+ struct packed_rrset_data* cached = (struct packed_rrset_data*)cd;
+ /* o store if rrset has been validated
+ * everything better than bogus data
+ * secure is preferred */
+ if( newd->security == sec_status_secure &&
+ cached->security != sec_status_secure)
+ return 1;
+ if( cached->security == sec_status_bogus &&
+ newd->security != sec_status_bogus && !equal)
+ return 1;
+ /* o if current RRset is more trustworthy - insert it */
+ if( newd->trust > cached->trust ) {
+ /* if the cached rrset is bogus, and this one equal,
+ * do not update the TTL - let it expire. */
+ if(equal && cached->ttl >= timenow &&
+ cached->security == sec_status_bogus)
+ return 0;
+ return 1;
+ }
+ /* o item in cache has expired */
+ if( cached->ttl < timenow )
+ return 1;
+ /* o same trust, but different in data - insert it */
+ if( newd->trust == cached->trust && !equal ) {
+ /* if this is type NS, do not 'stick' to owner that changes
+ * the NS RRset, but use the old TTL for the new data, and
+ * update to fetch the latest data. ttl is not expired, because
+ * that check was before this one. */
+ if(ns) {
+ size_t i;
+ newd->ttl = cached->ttl;
+ for(i=0; i<(newd->count+newd->rrsig_count); i++)
+ if(newd->rr_ttl[i] > newd->ttl)
+ newd->rr_ttl[i] = newd->ttl;
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/** Update RRSet special key ID */
+static void
+rrset_update_id(struct rrset_ref* ref, struct alloc_cache* alloc)
+{
+ /* this may clear the cache and invalidate lock below */
+ uint64_t newid = alloc_get_id(alloc);
+ /* obtain writelock */
+ lock_rw_wrlock(&ref->key->entry.lock);
+ /* check if it was deleted in the meantime, if so, skip update */
+ if(ref->key->id == ref->id) {
+ ref->key->id = newid;
+ ref->id = newid;
+ }
+ lock_rw_unlock(&ref->key->entry.lock);
+}
+
+int
+rrset_cache_update(struct rrset_cache* r, struct rrset_ref* ref,
+ struct alloc_cache* alloc, time_t timenow)
+{
+ struct lruhash_entry* e;
+ struct ub_packed_rrset_key* k = ref->key;
+ hashvalue_t h = k->entry.hash;
+ uint16_t rrset_type = ntohs(k->rk.type);
+ int equal = 0;
+ log_assert(ref->id != 0 && k->id != 0);
+ /* looks up item with a readlock - no editing! */
+ if((e=slabhash_lookup(&r->table, h, k, 0)) != 0) {
+ /* return id and key as they will be used in the cache
+ * since the lruhash_insert, if item already exists, deallocs
+ * the passed key in favor of the already stored key.
+ * because of the small gap (see below) this key ptr and id
+ * may prove later to be already deleted, which is no problem
+ * as it only makes a cache miss.
+ */
+ ref->key = (struct ub_packed_rrset_key*)e->key;
+ ref->id = ref->key->id;
+ equal = rrsetdata_equal((struct packed_rrset_data*)k->entry.
+ data, (struct packed_rrset_data*)e->data);
+ if(!need_to_update_rrset(k->entry.data, e->data, timenow,
+ equal, (rrset_type==LDNS_RR_TYPE_NS))) {
+ /* cache is superior, return that value */
+ lock_rw_unlock(&e->lock);
+ ub_packed_rrset_parsedelete(k, alloc);
+ if(equal) return 2;
+ return 1;
+ }
+ lock_rw_unlock(&e->lock);
+ /* Go on and insert the passed item.
+ * small gap here, where entry is not locked.
+ * possibly entry is updated with something else.
+ * we then overwrite that with our data.
+ * this is just too bad, its cache anyway. */
+ /* use insert to update entry to manage lruhash
+ * cache size values nicely. */
+ }
+ log_assert(ref->key->id != 0);
+ slabhash_insert(&r->table, h, &k->entry, k->entry.data, alloc);
+ if(e) {
+ /* For NSEC, NSEC3, DNAME, when rdata is updated, update
+ * the ID number so that proofs in message cache are
+ * invalidated */
+ if((rrset_type == LDNS_RR_TYPE_NSEC
+ || rrset_type == LDNS_RR_TYPE_NSEC3
+ || rrset_type == LDNS_RR_TYPE_DNAME) && !equal) {
+ rrset_update_id(ref, alloc);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+struct ub_packed_rrset_key*
+rrset_cache_lookup(struct rrset_cache* r, uint8_t* qname, size_t qnamelen,
+ uint16_t qtype, uint16_t qclass, uint32_t flags, time_t timenow,
+ int wr)
+{
+ struct lruhash_entry* e;
+ struct ub_packed_rrset_key key;
+
+ key.entry.key = &key;
+ key.entry.data = NULL;
+ key.rk.dname = qname;
+ key.rk.dname_len = qnamelen;
+ key.rk.type = htons(qtype);
+ key.rk.rrset_class = htons(qclass);
+ key.rk.flags = flags;
+
+ key.entry.hash = rrset_key_hash(&key.rk);
+
+ if((e = slabhash_lookup(&r->table, key.entry.hash, &key, wr))) {
+ /* check TTL */
+ struct packed_rrset_data* data =
+ (struct packed_rrset_data*)e->data;
+ if(timenow > data->ttl) {
+ lock_rw_unlock(&e->lock);
+ return NULL;
+ }
+ /* we're done */
+ return (struct ub_packed_rrset_key*)e->key;
+ }
+ return NULL;
+}
+
+int
+rrset_array_lock(struct rrset_ref* ref, size_t count, time_t timenow)
+{
+ size_t i;
+ for(i=0; i<count; i++) {
+ if(i>0 && ref[i].key == ref[i-1].key)
+ continue; /* only lock items once */
+ lock_rw_rdlock(&ref[i].key->entry.lock);
+ if(ref[i].id != ref[i].key->id || timenow >
+ ((struct packed_rrset_data*)(ref[i].key->entry.data))
+ ->ttl) {
+ /* failure! rollback our readlocks */
+ rrset_array_unlock(ref, i+1);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+void
+rrset_array_unlock(struct rrset_ref* ref, size_t count)
+{
+ size_t i;
+ for(i=0; i<count; i++) {
+ if(i>0 && ref[i].key == ref[i-1].key)
+ continue; /* only unlock items once */
+ lock_rw_unlock(&ref[i].key->entry.lock);
+ }
+}
+
+void
+rrset_array_unlock_touch(struct rrset_cache* r, struct regional* scratch,
+ struct rrset_ref* ref, size_t count)
+{
+ hashvalue_t* h;
+ size_t i;
+ if(!(h = (hashvalue_t*)regional_alloc(scratch,
+ sizeof(hashvalue_t)*count)))
+ log_warn("rrset LRU: memory allocation failed");
+ else /* store hash values */
+ for(i=0; i<count; i++)
+ h[i] = ref[i].key->entry.hash;
+ /* unlock */
+ for(i=0; i<count; i++) {
+ if(i>0 && ref[i].key == ref[i-1].key)
+ continue; /* only unlock items once */
+ lock_rw_unlock(&ref[i].key->entry.lock);
+ }
+ if(h) {
+ /* LRU touch, with no rrset locks held */
+ for(i=0; i<count; i++) {
+ if(i>0 && ref[i].key == ref[i-1].key)
+ continue; /* only touch items once */
+ rrset_cache_touch(r, ref[i].key, h[i], ref[i].id);
+ }
+ }
+}
+
+void
+rrset_update_sec_status(struct rrset_cache* r,
+ struct ub_packed_rrset_key* rrset, time_t now)
+{
+ struct packed_rrset_data* updata =
+ (struct packed_rrset_data*)rrset->entry.data;
+ struct lruhash_entry* e;
+ struct packed_rrset_data* cachedata;
+
+ /* hash it again to make sure it has a hash */
+ rrset->entry.hash = rrset_key_hash(&rrset->rk);
+
+ e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 1);
+ if(!e)
+ return; /* not in the cache anymore */
+ cachedata = (struct packed_rrset_data*)e->data;
+ if(!rrsetdata_equal(updata, cachedata)) {
+ lock_rw_unlock(&e->lock);
+ return; /* rrset has changed in the meantime */
+ }
+ /* update the cached rrset */
+ if(updata->security > cachedata->security) {
+ size_t i;
+ if(updata->trust > cachedata->trust)
+ cachedata->trust = updata->trust;
+ cachedata->security = updata->security;
+ /* for NS records only shorter TTLs, other types: update it */
+ if(ntohs(rrset->rk.type) != LDNS_RR_TYPE_NS ||
+ updata->ttl+now < cachedata->ttl ||
+ cachedata->ttl < now ||
+ updata->security == sec_status_bogus) {
+ cachedata->ttl = updata->ttl + now;
+ for(i=0; i<cachedata->count+cachedata->rrsig_count; i++)
+ cachedata->rr_ttl[i] = updata->rr_ttl[i]+now;
+ }
+ }
+ lock_rw_unlock(&e->lock);
+}
+
+void
+rrset_check_sec_status(struct rrset_cache* r,
+ struct ub_packed_rrset_key* rrset, time_t now)
+{
+ struct packed_rrset_data* updata =
+ (struct packed_rrset_data*)rrset->entry.data;
+ struct lruhash_entry* e;
+ struct packed_rrset_data* cachedata;
+
+ /* hash it again to make sure it has a hash */
+ rrset->entry.hash = rrset_key_hash(&rrset->rk);
+
+ e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 0);
+ if(!e)
+ return; /* not in the cache anymore */
+ cachedata = (struct packed_rrset_data*)e->data;
+ if(now > cachedata->ttl || !rrsetdata_equal(updata, cachedata)) {
+ lock_rw_unlock(&e->lock);
+ return; /* expired, or rrset has changed in the meantime */
+ }
+ if(cachedata->security > updata->security) {
+ updata->security = cachedata->security;
+ if(cachedata->security == sec_status_bogus) {
+ size_t i;
+ updata->ttl = cachedata->ttl - now;
+ for(i=0; i<cachedata->count+cachedata->rrsig_count; i++)
+ if(cachedata->rr_ttl[i] < now)
+ updata->rr_ttl[i] = 0;
+ else updata->rr_ttl[i] =
+ cachedata->rr_ttl[i]-now;
+ }
+ if(cachedata->trust > updata->trust)
+ updata->trust = cachedata->trust;
+ }
+ lock_rw_unlock(&e->lock);
+}
+
+void rrset_cache_remove(struct rrset_cache* r, uint8_t* nm, size_t nmlen,
+ uint16_t type, uint16_t dclass, uint32_t flags)
+{
+ struct ub_packed_rrset_key key;
+ key.entry.key = &key;
+ key.rk.dname = nm;
+ key.rk.dname_len = nmlen;
+ key.rk.rrset_class = htons(dclass);
+ key.rk.type = htons(type);
+ key.rk.flags = flags;
+ key.entry.hash = rrset_key_hash(&key.rk);
+ slabhash_remove(&r->table, key.entry.hash, &key);
+}
diff --git a/external/unbound/services/cache/rrset.h b/external/unbound/services/cache/rrset.h
new file mode 100644
index 000000000..98e44a4e5
--- /dev/null
+++ b/external/unbound/services/cache/rrset.h
@@ -0,0 +1,231 @@
+/*
+ * services/cache/rrset.h - Resource record set cache.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains the rrset cache.
+ */
+
+#ifndef SERVICES_CACHE_RRSET_H
+#define SERVICES_CACHE_RRSET_H
+#include "util/storage/lruhash.h"
+#include "util/storage/slabhash.h"
+#include "util/data/packed_rrset.h"
+struct config_file;
+struct alloc_cache;
+struct rrset_ref;
+struct regional;
+
+/**
+ * The rrset cache
+ * Thin wrapper around hashtable, like a typedef.
+ */
+struct rrset_cache {
+ /** uses partitioned hash table */
+ struct slabhash table;
+};
+
+/**
+ * Create rrset cache
+ * @param cfg: config settings or NULL for defaults.
+ * @param alloc: initial default rrset key allocation.
+ * @return: NULL on error.
+ */
+struct rrset_cache* rrset_cache_create(struct config_file* cfg,
+ struct alloc_cache* alloc);
+
+/**
+ * Delete rrset cache
+ * @param r: rrset cache to delete.
+ */
+void rrset_cache_delete(struct rrset_cache* r);
+
+/**
+ * Adjust settings of the cache to settings from the config file.
+ * May purge the cache. May recreate the cache.
+ * There may be no threading or use by other threads.
+ * @param r: rrset cache to adjust (like realloc).
+ * @param cfg: config settings or NULL for defaults.
+ * @param alloc: initial default rrset key allocation.
+ * @return 0 on error, or new rrset cache pointer on success.
+ */
+struct rrset_cache* rrset_cache_adjust(struct rrset_cache* r,
+ struct config_file* cfg, struct alloc_cache* alloc);
+
+/**
+ * Touch rrset, with given pointer and id.
+ * Caller may not hold a lock on ANY rrset, this could give deadlock.
+ *
+ * This routine is faster than a hashtable lookup:
+ * o no bin_lock is acquired.
+ * o no walk through the bin-overflow-list.
+ * o no comparison of the entry key to find it.
+ *
+ * @param r: rrset cache.
+ * @param key: rrset key. Marked recently used (if it was not deleted
+ * before the lock is acquired, in that case nothing happens).
+ * @param hash: hash value of the item. Please read it from the key when
+ * you have it locked. Used to find slab from slabhash.
+ * @param id: used to check that the item is unchanged and not deleted.
+ */
+void rrset_cache_touch(struct rrset_cache* r, struct ub_packed_rrset_key* key,
+ hashvalue_t hash, rrset_id_t id);
+
+/**
+ * Update an rrset in the rrset cache. Stores the information for later use.
+ * Will lookup if the rrset is in the cache and perform an update if necessary.
+ * If the item was present, and superior, references are returned to that.
+ * The passed item is then deallocated with rrset_parsedelete.
+ *
+ * A superior rrset is:
+ * o rrset with better trust value.
+ * o same trust value, different rdata, newly passed rrset is inserted.
+ * If rdata is the same, TTL in the cache is updated.
+ *
+ * @param r: the rrset cache.
+ * @param ref: reference (ptr and id) to the rrset. Pass reference setup for
+ * the new rrset. The reference may be changed if the cached rrset is
+ * superior.
+ * Before calling the rrset is presumed newly allocated and changeable.
+ * Afer calling you do not hold a lock, and the rrset is inserted in
+ * the hashtable so you need a lock to change it.
+ * @param alloc: how to allocate (and deallocate) the special rrset key.
+ * @param timenow: current time (to see if ttl in cache is expired).
+ * @return: true if the passed reference is updated, false if it is unchanged.
+ * 0: reference unchanged, inserted in cache.
+ * 1: reference updated, item is inserted in cache.
+ * 2: reference updated, item in cache is considered superior.
+ * also the rdata is equal (but other parameters in cache are superior).
+ */
+int rrset_cache_update(struct rrset_cache* r, struct rrset_ref* ref,
+ struct alloc_cache* alloc, time_t timenow);
+
+/**
+ * Lookup rrset. You obtain read/write lock. You must unlock before lookup
+ * anything of else.
+ * @param r: the rrset cache.
+ * @param qname: name of rrset to lookup.
+ * @param qnamelen: length of name of rrset to lookup.
+ * @param qtype: type of rrset to lookup (host order).
+ * @param qclass: class of rrset to lookup (host order).
+ * @param flags: rrset flags, or 0.
+ * @param timenow: used to compare with TTL.
+ * @param wr: set true to get writelock.
+ * @return packed rrset key pointer. Remember to unlock the key.entry.lock.
+ * or NULL if could not be found or it was timed out.
+ */
+struct ub_packed_rrset_key* rrset_cache_lookup(struct rrset_cache* r,
+ uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass,
+ uint32_t flags, time_t timenow, int wr);
+
+/**
+ * Obtain readlock on a (sorted) list of rrset references.
+ * Checks TTLs and IDs of the rrsets and rollbacks locking if not Ok.
+ * @param ref: array of rrset references (key pointer and ID value).
+ * duplicate references are allowed and handled.
+ * @param count: size of array.
+ * @param timenow: used to compare with TTL.
+ * @return true on success, false on a failure, which can be that some
+ * RRsets have timed out, or that they do not exist any more, the
+ * RRsets have been purged from the cache.
+ * If true, you hold readlocks on all the ref items.
+ */
+int rrset_array_lock(struct rrset_ref* ref, size_t count, time_t timenow);
+
+/**
+ * Unlock array (sorted) of rrset references.
+ * @param ref: array of rrset references (key pointer and ID value).
+ * duplicate references are allowed and handled.
+ * @param count: size of array.
+ */
+void rrset_array_unlock(struct rrset_ref* ref, size_t count);
+
+/**
+ * Unlock array (sorted) of rrset references and at the same time
+ * touch LRU on the rrsets. It needs the scratch region for temporary
+ * storage as it uses the initial locks to obtain hash values.
+ * @param r: the rrset cache. In this cache LRU is updated.
+ * @param scratch: region for temporary storage of hash values.
+ * if memory allocation fails, the lru touch fails silently,
+ * but locks are released. memory errors are logged.
+ * @param ref: array of rrset references (key pointer and ID value).
+ * duplicate references are allowed and handled.
+ * @param count: size of array.
+ */
+void rrset_array_unlock_touch(struct rrset_cache* r, struct regional* scratch,
+ struct rrset_ref* ref, size_t count);
+
+/**
+ * Update security status of an rrset. Looks up the rrset.
+ * If found, checks if rdata is equal.
+ * If so, it will update the security, trust and rrset-ttl values.
+ * The values are only updated if security is increased (towards secure).
+ * @param r: the rrset cache.
+ * @param rrset: which rrset to attempt to update. This rrset is left
+ * untouched. The rrset in the cache is updated in-place.
+ * @param now: current time.
+ */
+void rrset_update_sec_status(struct rrset_cache* r,
+ struct ub_packed_rrset_key* rrset, time_t now);
+
+/**
+ * Looks up security status of an rrset. Looks up the rrset.
+ * If found, checks if rdata is equal, and entry did not expire.
+ * If so, it will update the security, trust and rrset-ttl values.
+ * @param r: the rrset cache.
+ * @param rrset: This rrset may change security status due to the cache.
+ * But its status will only improve, towards secure.
+ * @param now: current time.
+ */
+void rrset_check_sec_status(struct rrset_cache* r,
+ struct ub_packed_rrset_key* rrset, time_t now);
+
+/**
+ * Remove an rrset from the cache, by name and type and flags
+ * @param r: rrset cache
+ * @param nm: name of rrset
+ * @param nmlen: length of name
+ * @param type: type of rrset
+ * @param dclass: class of rrset, host order
+ * @param flags: flags of rrset, host order
+ */
+void rrset_cache_remove(struct rrset_cache* r, uint8_t* nm, size_t nmlen,
+ uint16_t type, uint16_t dclass, uint32_t flags);
+
+/** mark rrset to be deleted, set id=0 */
+void rrset_markdel(void* key);
+
+#endif /* SERVICES_CACHE_RRSET_H */
diff --git a/external/unbound/services/listen_dnsport.c b/external/unbound/services/listen_dnsport.c
new file mode 100644
index 000000000..b7ffb6d3f
--- /dev/null
+++ b/external/unbound/services/listen_dnsport.c
@@ -0,0 +1,1062 @@
+/*
+ * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file has functions to get queries from clients.
+ */
+#include "config.h"
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#include <sys/time.h>
+#include "services/listen_dnsport.h"
+#include "services/outside_network.h"
+#include "util/netevent.h"
+#include "util/log.h"
+#include "util/config_file.h"
+#include "util/net_help.h"
+#include "ldns/sbuffer.h"
+
+#ifdef HAVE_NETDB_H
+#include <netdb.h>
+#endif
+#include <fcntl.h>
+
+/** number of queued TCP connections for listen() */
+#define TCP_BACKLOG 256
+
+/**
+ * Debug print of the getaddrinfo returned address.
+ * @param addr: the address returned.
+ */
+static void
+verbose_print_addr(struct addrinfo *addr)
+{
+ if(verbosity >= VERB_ALGO) {
+ char buf[100];
+ void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
+#ifdef INET6
+ if(addr->ai_family == AF_INET6)
+ sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
+ sin6_addr;
+#endif /* INET6 */
+ if(inet_ntop(addr->ai_family, sinaddr, buf,
+ (socklen_t)sizeof(buf)) == 0) {
+ (void)strlcpy(buf, "(null)", sizeof(buf));
+ }
+ buf[sizeof(buf)-1] = 0;
+ verbose(VERB_ALGO, "creating %s%s socket %s %d",
+ addr->ai_socktype==SOCK_DGRAM?"udp":
+ addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
+ addr->ai_family==AF_INET?"4":
+ addr->ai_family==AF_INET6?"6":
+ "_otherfam", buf,
+ ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
+ }
+}
+
+int
+create_udp_sock(int family, int socktype, struct sockaddr* addr,
+ socklen_t addrlen, int v6only, int* inuse, int* noproto,
+ int rcv, int snd, int listen, int* reuseport)
+{
+ int s;
+#if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)
+ int on=1;
+#endif
+#ifdef IPV6_MTU
+ int mtu = IPV6_MIN_MTU;
+#endif
+#if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
+ (void)rcv;
+#endif
+#if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
+ (void)snd;
+#endif
+#ifndef IPV6_V6ONLY
+ (void)v6only;
+#endif
+ if((s = socket(family, socktype, 0)) == -1) {
+ *inuse = 0;
+#ifndef USE_WINSOCK
+ if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
+ *noproto = 1;
+ return -1;
+ }
+ log_err("can't create socket: %s", strerror(errno));
+#else
+ if(WSAGetLastError() == WSAEAFNOSUPPORT ||
+ WSAGetLastError() == WSAEPROTONOSUPPORT) {
+ *noproto = 1;
+ return -1;
+ }
+ log_err("can't create socket: %s",
+ wsa_strerror(WSAGetLastError()));
+#endif
+ *noproto = 0;
+ return -1;
+ }
+ if(listen) {
+#ifdef SO_REUSEADDR
+ if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
+ (socklen_t)sizeof(on)) < 0) {
+#ifndef USE_WINSOCK
+ log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
+ strerror(errno));
+ if(errno != ENOSYS) {
+ close(s);
+ *noproto = 0;
+ *inuse = 0;
+ return -1;
+ }
+#else
+ log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
+ wsa_strerror(WSAGetLastError()));
+ closesocket(s);
+ *noproto = 0;
+ *inuse = 0;
+ return -1;
+#endif
+ }
+#endif /* SO_REUSEADDR */
+#ifdef SO_REUSEPORT
+ /* try to set SO_REUSEPORT so that incoming
+ * queries are distributed evenly among the receiving threads.
+ * Each thread must have its own socket bound to the same port,
+ * with SO_REUSEPORT set on each socket.
+ */
+ if (reuseport && *reuseport &&
+ setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
+ (socklen_t)sizeof(on)) < 0) {
+#ifdef ENOPROTOOPT
+ if(errno != ENOPROTOOPT || verbosity >= 3)
+ log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
+ strerror(errno));
+#endif
+ /* this option is not essential, we can continue */
+ *reuseport = 0;
+ }
+#else
+ (void)reuseport;
+#endif /* defined(SO_REUSEPORT) */
+ }
+ if(rcv) {
+#ifdef SO_RCVBUF
+ int got;
+ socklen_t slen = (socklen_t)sizeof(got);
+# ifdef SO_RCVBUFFORCE
+ /* Linux specific: try to use root permission to override
+ * system limits on rcvbuf. The limit is stored in
+ * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
+ if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
+ (socklen_t)sizeof(rcv)) < 0) {
+ if(errno != EPERM) {
+# ifndef USE_WINSOCK
+ log_err("setsockopt(..., SO_RCVBUFFORCE, "
+ "...) failed: %s", strerror(errno));
+ close(s);
+# else
+ log_err("setsockopt(..., SO_RCVBUFFORCE, "
+ "...) failed: %s",
+ wsa_strerror(WSAGetLastError()));
+ closesocket(s);
+# endif
+ *noproto = 0;
+ *inuse = 0;
+ return -1;
+ }
+# endif /* SO_RCVBUFFORCE */
+ if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
+ (socklen_t)sizeof(rcv)) < 0) {
+# ifndef USE_WINSOCK
+ log_err("setsockopt(..., SO_RCVBUF, "
+ "...) failed: %s", strerror(errno));
+ close(s);
+# else
+ log_err("setsockopt(..., SO_RCVBUF, "
+ "...) failed: %s",
+ wsa_strerror(WSAGetLastError()));
+ closesocket(s);
+# endif
+ *noproto = 0;
+ *inuse = 0;
+ return -1;
+ }
+ /* check if we got the right thing or if system
+ * reduced to some system max. Warn if so */
+ if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
+ &slen) >= 0 && got < rcv/2) {
+ log_warn("so-rcvbuf %u was not granted. "
+ "Got %u. To fix: start with "
+ "root permissions(linux) or sysctl "
+ "bigger net.core.rmem_max(linux) or "
+ "kern.ipc.maxsockbuf(bsd) values.",
+ (unsigned)rcv, (unsigned)got);
+ }
+# ifdef SO_RCVBUFFORCE
+ }
+# endif
+#endif /* SO_RCVBUF */
+ }
+ /* first do RCVBUF as the receive buffer is more important */
+ if(snd) {
+#ifdef SO_SNDBUF
+ int got;
+ socklen_t slen = (socklen_t)sizeof(got);
+# ifdef SO_SNDBUFFORCE
+ /* Linux specific: try to use root permission to override
+ * system limits on sndbuf. The limit is stored in
+ * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
+ if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
+ (socklen_t)sizeof(snd)) < 0) {
+ if(errno != EPERM) {
+# ifndef USE_WINSOCK
+ log_err("setsockopt(..., SO_SNDBUFFORCE, "
+ "...) failed: %s", strerror(errno));
+ close(s);
+# else
+ log_err("setsockopt(..., SO_SNDBUFFORCE, "
+ "...) failed: %s",
+ wsa_strerror(WSAGetLastError()));
+ closesocket(s);
+# endif
+ *noproto = 0;
+ *inuse = 0;
+ return -1;
+ }
+# endif /* SO_SNDBUFFORCE */
+ if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
+ (socklen_t)sizeof(snd)) < 0) {
+# ifndef USE_WINSOCK
+ log_err("setsockopt(..., SO_SNDBUF, "
+ "...) failed: %s", strerror(errno));
+ close(s);
+# else
+ log_err("setsockopt(..., SO_SNDBUF, "
+ "...) failed: %s",
+ wsa_strerror(WSAGetLastError()));
+ closesocket(s);
+# endif
+ *noproto = 0;
+ *inuse = 0;
+ return -1;
+ }
+ /* check if we got the right thing or if system
+ * reduced to some system max. Warn if so */
+ if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
+ &slen) >= 0 && got < snd/2) {
+ log_warn("so-sndbuf %u was not granted. "
+ "Got %u. To fix: start with "
+ "root permissions(linux) or sysctl "
+ "bigger net.core.wmem_max(linux) or "
+ "kern.ipc.maxsockbuf(bsd) values.",
+ (unsigned)snd, (unsigned)got);
+ }
+# ifdef SO_SNDBUFFORCE
+ }
+# endif
+#endif /* SO_SNDBUF */
+ }
+ if(family == AF_INET6) {
+# if defined(IPV6_V6ONLY)
+ if(v6only) {
+ int val=(v6only==2)?0:1;
+ if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
+ (void*)&val, (socklen_t)sizeof(val)) < 0) {
+#ifndef USE_WINSOCK
+ log_err("setsockopt(..., IPV6_V6ONLY"
+ ", ...) failed: %s", strerror(errno));
+ close(s);
+#else
+ log_err("setsockopt(..., IPV6_V6ONLY"
+ ", ...) failed: %s",
+ wsa_strerror(WSAGetLastError()));
+ closesocket(s);
+#endif
+ *noproto = 0;
+ *inuse = 0;
+ return -1;
+ }
+ }
+# endif
+# if defined(IPV6_USE_MIN_MTU)
+ /*
+ * There is no fragmentation of IPv6 datagrams
+ * during forwarding in the network. Therefore
+ * we do not send UDP datagrams larger than
+ * the minimum IPv6 MTU of 1280 octets. The
+ * EDNS0 message length can be larger if the
+ * network stack supports IPV6_USE_MIN_MTU.
+ */
+ if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
+ (void*)&on, (socklen_t)sizeof(on)) < 0) {
+# ifndef USE_WINSOCK
+ log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
+ "...) failed: %s", strerror(errno));
+ close(s);
+# else
+ log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
+ "...) failed: %s",
+ wsa_strerror(WSAGetLastError()));
+ closesocket(s);
+# endif
+ *noproto = 0;
+ *inuse = 0;
+ return -1;
+ }
+# elif defined(IPV6_MTU)
+ /*
+ * On Linux, to send no larger than 1280, the PMTUD is
+ * disabled by default for datagrams anyway, so we set
+ * the MTU to use.
+ */
+ if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
+ (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
+# ifndef USE_WINSOCK
+ log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
+ strerror(errno));
+ close(s);
+# else
+ log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
+ wsa_strerror(WSAGetLastError()));
+ closesocket(s);
+# endif
+ *noproto = 0;
+ *inuse = 0;
+ return -1;
+ }
+# endif /* IPv6 MTU */
+ } else if(family == AF_INET) {
+# if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
+/* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
+ * PMTU information is not accepted, but fragmentation is allowed
+ * if and only if the packet size exceeds the outgoing interface MTU
+ * (and also uses the interface mtu to determine the size of the packets).
+ * So there won't be any EMSGSIZE error. Against DNS fragmentation attacks.
+ * FreeBSD already has same semantics without setting the option. */
+# if defined(IP_PMTUDISC_OMIT)
+ int action = IP_PMTUDISC_OMIT;
+# else
+ int action = IP_PMTUDISC_DONT;
+# endif
+ if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
+ &action, (socklen_t)sizeof(action)) < 0) {
+ log_err("setsockopt(..., IP_MTU_DISCOVER, "
+# if defined(IP_PMTUDISC_OMIT)
+ "IP_PMTUDISC_OMIT"
+# else
+ "IP_PMTUDISC_DONT"
+# endif
+ "...) failed: %s",
+ strerror(errno));
+# ifndef USE_WINSOCK
+ close(s);
+# else
+ closesocket(s);
+# endif
+ *noproto = 0;
+ *inuse = 0;
+ return -1;
+ }
+# elif defined(IP_DONTFRAG)
+ int off = 0;
+ if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
+ &off, (socklen_t)sizeof(off)) < 0) {
+ log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
+ strerror(errno));
+# ifndef USE_WINSOCK
+ close(s);
+# else
+ closesocket(s);
+# endif
+ *noproto = 0;
+ *inuse = 0;
+ return -1;
+ }
+# endif /* IPv4 MTU */
+ }
+ if(bind(s, (struct sockaddr*)addr, addrlen) != 0) {
+ *noproto = 0;
+ *inuse = 0;
+#ifndef USE_WINSOCK
+#ifdef EADDRINUSE
+ *inuse = (errno == EADDRINUSE);
+ /* detect freebsd jail with no ipv6 permission */
+ if(family==AF_INET6 && errno==EINVAL)
+ *noproto = 1;
+ else if(errno != EADDRINUSE) {
+ log_err_addr("can't bind socket", strerror(errno),
+ (struct sockaddr_storage*)addr, addrlen);
+ }
+#endif /* EADDRINUSE */
+ close(s);
+#else /* USE_WINSOCK */
+ if(WSAGetLastError() != WSAEADDRINUSE &&
+ WSAGetLastError() != WSAEADDRNOTAVAIL) {
+ log_err_addr("can't bind socket",
+ wsa_strerror(WSAGetLastError()),
+ (struct sockaddr_storage*)addr, addrlen);
+ }
+ closesocket(s);
+#endif
+ return -1;
+ }
+ if(!fd_set_nonblock(s)) {
+ *noproto = 0;
+ *inuse = 0;
+#ifndef USE_WINSOCK
+ close(s);
+#else
+ closesocket(s);
+#endif
+ return -1;
+ }
+ return s;
+}
+
+int
+create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
+ int* reuseport)
+{
+ int s;
+#if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY)
+ int on = 1;
+#endif /* SO_REUSEADDR || IPV6_V6ONLY */
+ verbose_print_addr(addr);
+ *noproto = 0;
+ if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
+#ifndef USE_WINSOCK
+ if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
+ *noproto = 1;
+ return -1;
+ }
+ log_err("can't create socket: %s", strerror(errno));
+#else
+ if(WSAGetLastError() == WSAEAFNOSUPPORT ||
+ WSAGetLastError() == WSAEPROTONOSUPPORT) {
+ *noproto = 1;
+ return -1;
+ }
+ log_err("can't create socket: %s",
+ wsa_strerror(WSAGetLastError()));
+#endif
+ return -1;
+ }
+#ifdef SO_REUSEADDR
+ if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
+ (socklen_t)sizeof(on)) < 0) {
+#ifndef USE_WINSOCK
+ log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
+ strerror(errno));
+ close(s);
+#else
+ log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
+ wsa_strerror(WSAGetLastError()));
+ closesocket(s);
+#endif
+ return -1;
+ }
+#endif /* SO_REUSEADDR */
+#ifdef SO_REUSEPORT
+ /* try to set SO_REUSEPORT so that incoming
+ * connections are distributed evenly among the receiving threads.
+ * Each thread must have its own socket bound to the same port,
+ * with SO_REUSEPORT set on each socket.
+ */
+ if (reuseport && *reuseport &&
+ setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
+ (socklen_t)sizeof(on)) < 0) {
+#ifdef ENOPROTOOPT
+ if(errno != ENOPROTOOPT || verbosity >= 3)
+ log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
+ strerror(errno));
+#endif
+ /* this option is not essential, we can continue */
+ *reuseport = 0;
+ }
+#else
+ (void)reuseport;
+#endif /* defined(SO_REUSEPORT) */
+#if defined(IPV6_V6ONLY)
+ if(addr->ai_family == AF_INET6 && v6only) {
+ if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
+ (void*)&on, (socklen_t)sizeof(on)) < 0) {
+#ifndef USE_WINSOCK
+ log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
+ strerror(errno));
+ close(s);
+#else
+ log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
+ wsa_strerror(WSAGetLastError()));
+ closesocket(s);
+#endif
+ return -1;
+ }
+ }
+#else
+ (void)v6only;
+#endif /* IPV6_V6ONLY */
+ if(bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
+#ifndef USE_WINSOCK
+ /* detect freebsd jail with no ipv6 permission */
+ if(addr->ai_family==AF_INET6 && errno==EINVAL)
+ *noproto = 1;
+ else {
+ log_err_addr("can't bind socket", strerror(errno),
+ (struct sockaddr_storage*)addr->ai_addr,
+ addr->ai_addrlen);
+ }
+ close(s);
+#else
+ log_err_addr("can't bind socket",
+ wsa_strerror(WSAGetLastError()),
+ (struct sockaddr_storage*)addr->ai_addr,
+ addr->ai_addrlen);
+ closesocket(s);
+#endif
+ return -1;
+ }
+ if(!fd_set_nonblock(s)) {
+#ifndef USE_WINSOCK
+ close(s);
+#else
+ closesocket(s);
+#endif
+ return -1;
+ }
+ if(listen(s, TCP_BACKLOG) == -1) {
+#ifndef USE_WINSOCK
+ log_err("can't listen: %s", strerror(errno));
+ close(s);
+#else
+ log_err("can't listen: %s", wsa_strerror(WSAGetLastError()));
+ closesocket(s);
+#endif
+ return -1;
+ }
+ return s;
+}
+
+/**
+ * Create socket from getaddrinfo results
+ */
+static int
+make_sock(int stype, const char* ifname, const char* port,
+ struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
+ int* reuseport)
+{
+ struct addrinfo *res = NULL;
+ int r, s, inuse, noproto;
+ hints->ai_socktype = stype;
+ *noip6 = 0;
+ if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
+#ifdef USE_WINSOCK
+ if(r == EAI_NONAME && hints->ai_family == AF_INET6){
+ *noip6 = 1; /* 'Host not found' for IP6 on winXP */
+ return -1;
+ }
+#endif
+ log_err("node %s:%s getaddrinfo: %s %s",
+ ifname?ifname:"default", port, gai_strerror(r),
+#ifdef EAI_SYSTEM
+ r==EAI_SYSTEM?(char*)strerror(errno):""
+#else
+ ""
+#endif
+ );
+ return -1;
+ }
+ if(stype == SOCK_DGRAM) {
+ verbose_print_addr(res);
+ s = create_udp_sock(res->ai_family, res->ai_socktype,
+ (struct sockaddr*)res->ai_addr, res->ai_addrlen,
+ v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
+ reuseport);
+ if(s == -1 && inuse) {
+ log_err("bind: address already in use");
+ } else if(s == -1 && noproto && hints->ai_family == AF_INET6){
+ *noip6 = 1;
+ }
+ } else {
+ s = create_tcp_accept_sock(res, v6only, &noproto, reuseport);
+ if(s == -1 && noproto && hints->ai_family == AF_INET6){
+ *noip6 = 1;
+ }
+ }
+ freeaddrinfo(res);
+ return s;
+}
+
+/** make socket and first see if ifname contains port override info */
+static int
+make_sock_port(int stype, const char* ifname, const char* port,
+ struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
+ int* reuseport)
+{
+ char* s = strchr(ifname, '@');
+ if(s) {
+ /* override port with ifspec@port */
+ char p[16];
+ char newif[128];
+ if((size_t)(s-ifname) >= sizeof(newif)) {
+ log_err("ifname too long: %s", ifname);
+ *noip6 = 0;
+ return -1;
+ }
+ if(strlen(s+1) >= sizeof(p)) {
+ log_err("portnumber too long: %s", ifname);
+ *noip6 = 0;
+ return -1;
+ }
+ (void)strlcpy(newif, ifname, sizeof(newif));
+ newif[s-ifname] = 0;
+ (void)strlcpy(p, s+1, sizeof(p));
+ p[strlen(s+1)]=0;
+ return make_sock(stype, newif, p, hints, v6only, noip6,
+ rcv, snd, reuseport);
+ }
+ return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
+ reuseport);
+}
+
+/**
+ * Add port to open ports list.
+ * @param list: list head. changed.
+ * @param s: fd.
+ * @param ftype: if fd is UDP.
+ * @return false on failure. list in unchanged then.
+ */
+static int
+port_insert(struct listen_port** list, int s, enum listen_type ftype)
+{
+ struct listen_port* item = (struct listen_port*)malloc(
+ sizeof(struct listen_port));
+ if(!item)
+ return 0;
+ item->next = *list;
+ item->fd = s;
+ item->ftype = ftype;
+ *list = item;
+ return 1;
+}
+
+/** set fd to receive source address packet info */
+static int
+set_recvpktinfo(int s, int family)
+{
+#if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
+ int on = 1;
+#else
+ (void)s;
+#endif
+ if(family == AF_INET6) {
+# ifdef IPV6_RECVPKTINFO
+ if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
+ (void*)&on, (socklen_t)sizeof(on)) < 0) {
+ log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
+ strerror(errno));
+ return 0;
+ }
+# elif defined(IPV6_PKTINFO)
+ if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
+ (void*)&on, (socklen_t)sizeof(on)) < 0) {
+ log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
+ strerror(errno));
+ return 0;
+ }
+# else
+ log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please "
+ "disable interface-automatic in config");
+ return 0;
+# endif /* defined IPV6_RECVPKTINFO */
+
+ } else if(family == AF_INET) {
+# ifdef IP_PKTINFO
+ if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
+ (void*)&on, (socklen_t)sizeof(on)) < 0) {
+ log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
+ strerror(errno));
+ return 0;
+ }
+# elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
+ if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
+ (void*)&on, (socklen_t)sizeof(on)) < 0) {
+ log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
+ strerror(errno));
+ return 0;
+ }
+# else
+ log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
+ "interface-automatic in config");
+ return 0;
+# endif /* IP_PKTINFO */
+
+ }
+ return 1;
+}
+
+/**
+ * Helper for ports_open. Creates one interface (or NULL for default).
+ * @param ifname: The interface ip address.
+ * @param do_auto: use automatic interface detection.
+ * If enabled, then ifname must be the wildcard name.
+ * @param do_udp: if udp should be used.
+ * @param do_tcp: if udp should be used.
+ * @param hints: for getaddrinfo. family and flags have to be set by caller.
+ * @param port: Port number to use (as string).
+ * @param list: list of open ports, appended to, changed to point to list head.
+ * @param rcv: receive buffer size for UDP
+ * @param snd: send buffer size for UDP
+ * @param ssl_port: ssl service port number
+ * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
+ * set to false on exit if reuseport failed due to no kernel support.
+ * @return: returns false on error.
+ */
+static int
+ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
+ struct addrinfo *hints, const char* port, struct listen_port** list,
+ size_t rcv, size_t snd, int ssl_port, int* reuseport)
+{
+ int s, noip6=0;
+ if(!do_udp && !do_tcp)
+ return 0;
+ if(do_auto) {
+ if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
+ &noip6, rcv, snd, reuseport)) == -1) {
+ if(noip6) {
+ log_warn("IPv6 protocol not available");
+ return 1;
+ }
+ return 0;
+ }
+ /* getting source addr packet info is highly non-portable */
+ if(!set_recvpktinfo(s, hints->ai_family)) {
+#ifndef USE_WINSOCK
+ close(s);
+#else
+ closesocket(s);
+#endif
+ return 0;
+ }
+ if(!port_insert(list, s, listen_type_udpancil)) {
+#ifndef USE_WINSOCK
+ close(s);
+#else
+ closesocket(s);
+#endif
+ return 0;
+ }
+ } else if(do_udp) {
+ /* regular udp socket */
+ if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
+ &noip6, rcv, snd, reuseport)) == -1) {
+ if(noip6) {
+ log_warn("IPv6 protocol not available");
+ return 1;
+ }
+ return 0;
+ }
+ if(!port_insert(list, s, listen_type_udp)) {
+#ifndef USE_WINSOCK
+ close(s);
+#else
+ closesocket(s);
+#endif
+ return 0;
+ }
+ }
+ if(do_tcp) {
+ int is_ssl = ((strchr(ifname, '@') &&
+ atoi(strchr(ifname, '@')+1) == ssl_port) ||
+ (!strchr(ifname, '@') && atoi(port) == ssl_port));
+ if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
+ &noip6, 0, 0, reuseport)) == -1) {
+ if(noip6) {
+ /*log_warn("IPv6 protocol not available");*/
+ return 1;
+ }
+ return 0;
+ }
+ if(is_ssl)
+ verbose(VERB_ALGO, "setup TCP for SSL service");
+ if(!port_insert(list, s, is_ssl?listen_type_ssl:
+ listen_type_tcp)) {
+#ifndef USE_WINSOCK
+ close(s);
+#else
+ closesocket(s);
+#endif
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/**
+ * Add items to commpoint list in front.
+ * @param c: commpoint to add.
+ * @param front: listen struct.
+ * @return: false on failure.
+ */
+static int
+listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
+{
+ struct listen_list* item = (struct listen_list*)malloc(
+ sizeof(struct listen_list));
+ if(!item)
+ return 0;
+ item->com = c;
+ item->next = front->cps;
+ front->cps = item;
+ return 1;
+}
+
+struct listen_dnsport*
+listen_create(struct comm_base* base, struct listen_port* ports,
+ size_t bufsize, int tcp_accept_count, void* sslctx,
+ struct dt_env* dtenv, comm_point_callback_t* cb, void *cb_arg)
+{
+ struct listen_dnsport* front = (struct listen_dnsport*)
+ malloc(sizeof(struct listen_dnsport));
+ if(!front)
+ return NULL;
+ front->cps = NULL;
+ front->udp_buff = sldns_buffer_new(bufsize);
+ if(!front->udp_buff) {
+ free(front);
+ return NULL;
+ }
+
+ /* create comm points as needed */
+ while(ports) {
+ struct comm_point* cp = NULL;
+ if(ports->ftype == listen_type_udp)
+ cp = comm_point_create_udp(base, ports->fd,
+ front->udp_buff, cb, cb_arg);
+ else if(ports->ftype == listen_type_tcp)
+ cp = comm_point_create_tcp(base, ports->fd,
+ tcp_accept_count, bufsize, cb, cb_arg);
+ else if(ports->ftype == listen_type_ssl) {
+ cp = comm_point_create_tcp(base, ports->fd,
+ tcp_accept_count, bufsize, cb, cb_arg);
+ cp->ssl = sslctx;
+ } else if(ports->ftype == listen_type_udpancil)
+ cp = comm_point_create_udp_ancil(base, ports->fd,
+ front->udp_buff, cb, cb_arg);
+ if(!cp) {
+ log_err("can't create commpoint");
+ listen_delete(front);
+ return NULL;
+ }
+ cp->dtenv = dtenv;
+ cp->do_not_close = 1;
+ if(!listen_cp_insert(cp, front)) {
+ log_err("malloc failed");
+ comm_point_delete(cp);
+ listen_delete(front);
+ return NULL;
+ }
+ ports = ports->next;
+ }
+ if(!front->cps) {
+ log_err("Could not open sockets to accept queries.");
+ listen_delete(front);
+ return NULL;
+ }
+
+ return front;
+}
+
+void
+listen_list_delete(struct listen_list* list)
+{
+ struct listen_list *p = list, *pn;
+ while(p) {
+ pn = p->next;
+ comm_point_delete(p->com);
+ free(p);
+ p = pn;
+ }
+}
+
+void
+listen_delete(struct listen_dnsport* front)
+{
+ if(!front)
+ return;
+ listen_list_delete(front->cps);
+ sldns_buffer_free(front->udp_buff);
+ free(front);
+}
+
+struct listen_port*
+listening_ports_open(struct config_file* cfg, int* reuseport)
+{
+ struct listen_port* list = NULL;
+ struct addrinfo hints;
+ int i, do_ip4, do_ip6;
+ int do_tcp, do_auto;
+ char portbuf[32];
+ snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
+ do_ip4 = cfg->do_ip4;
+ do_ip6 = cfg->do_ip6;
+ do_tcp = cfg->do_tcp;
+ do_auto = cfg->if_automatic && cfg->do_udp;
+ if(cfg->incoming_num_tcp == 0)
+ do_tcp = 0;
+
+ /* getaddrinfo */
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_flags = AI_PASSIVE;
+ /* no name lookups on our listening ports */
+ if(cfg->num_ifs > 0)
+ hints.ai_flags |= AI_NUMERICHOST;
+ hints.ai_family = AF_UNSPEC;
+#ifndef INET6
+ do_ip6 = 0;
+#endif
+ if(!do_ip4 && !do_ip6) {
+ return NULL;
+ }
+ /* create ip4 and ip6 ports so that return addresses are nice. */
+ if(do_auto || cfg->num_ifs == 0) {
+ if(do_ip6) {
+ hints.ai_family = AF_INET6;
+ if(!ports_create_if(do_auto?"::0":"::1",
+ do_auto, cfg->do_udp, do_tcp,
+ &hints, portbuf, &list,
+ cfg->so_rcvbuf, cfg->so_sndbuf,
+ cfg->ssl_port, reuseport)) {
+ listening_ports_free(list);
+ return NULL;
+ }
+ }
+ if(do_ip4) {
+ hints.ai_family = AF_INET;
+ if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
+ do_auto, cfg->do_udp, do_tcp,
+ &hints, portbuf, &list,
+ cfg->so_rcvbuf, cfg->so_sndbuf,
+ cfg->ssl_port, reuseport)) {
+ listening_ports_free(list);
+ return NULL;
+ }
+ }
+ } else for(i = 0; i<cfg->num_ifs; i++) {
+ if(str_is_ip6(cfg->ifs[i])) {
+ if(!do_ip6)
+ continue;
+ hints.ai_family = AF_INET6;
+ if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
+ do_tcp, &hints, portbuf, &list,
+ cfg->so_rcvbuf, cfg->so_sndbuf,
+ cfg->ssl_port, reuseport)) {
+ listening_ports_free(list);
+ return NULL;
+ }
+ } else {
+ if(!do_ip4)
+ continue;
+ hints.ai_family = AF_INET;
+ if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
+ do_tcp, &hints, portbuf, &list,
+ cfg->so_rcvbuf, cfg->so_sndbuf,
+ cfg->ssl_port, reuseport)) {
+ listening_ports_free(list);
+ return NULL;
+ }
+ }
+ }
+ return list;
+}
+
+void listening_ports_free(struct listen_port* list)
+{
+ struct listen_port* nx;
+ while(list) {
+ nx = list->next;
+ if(list->fd != -1) {
+#ifndef USE_WINSOCK
+ close(list->fd);
+#else
+ closesocket(list->fd);
+#endif
+ }
+ free(list);
+ list = nx;
+ }
+}
+
+size_t listen_get_mem(struct listen_dnsport* listen)
+{
+ size_t s = sizeof(*listen) + sizeof(*listen->base) +
+ sizeof(*listen->udp_buff) +
+ sldns_buffer_capacity(listen->udp_buff);
+ struct listen_list* p;
+ for(p = listen->cps; p; p = p->next) {
+ s += sizeof(*p);
+ s += comm_point_get_mem(p->com);
+ }
+ return s;
+}
+
+void listen_stop_accept(struct listen_dnsport* listen)
+{
+ /* do not stop the ones that have no tcp_free list
+ * (they have already stopped listening) */
+ struct listen_list* p;
+ for(p=listen->cps; p; p=p->next) {
+ if(p->com->type == comm_tcp_accept &&
+ p->com->tcp_free != NULL) {
+ comm_point_stop_listening(p->com);
+ }
+ }
+}
+
+void listen_start_accept(struct listen_dnsport* listen)
+{
+ /* do not start the ones that have no tcp_free list, it is no
+ * use to listen to them because they have no free tcp handlers */
+ struct listen_list* p;
+ for(p=listen->cps; p; p=p->next) {
+ if(p->com->type == comm_tcp_accept &&
+ p->com->tcp_free != NULL) {
+ comm_point_start_listening(p->com, -1, -1);
+ }
+ }
+}
+
diff --git a/external/unbound/services/listen_dnsport.h b/external/unbound/services/listen_dnsport.h
new file mode 100644
index 000000000..075f6d281
--- /dev/null
+++ b/external/unbound/services/listen_dnsport.h
@@ -0,0 +1,210 @@
+/*
+ * services/listen_dnsport.h - listen on port 53 for incoming DNS queries.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file has functions to get queries from clients.
+ */
+
+#ifndef LISTEN_DNSPORT_H
+#define LISTEN_DNSPORT_H
+
+#include "util/netevent.h"
+struct listen_list;
+struct config_file;
+struct addrinfo;
+struct sldns_buffer;
+
+/**
+ * Listening for queries structure.
+ * Contains list of query-listen sockets.
+ */
+struct listen_dnsport {
+ /** Base for select calls */
+ struct comm_base* base;
+
+ /** buffer shared by UDP connections, since there is only one
+ datagram at any time. */
+ struct sldns_buffer* udp_buff;
+
+ /** list of comm points used to get incoming events */
+ struct listen_list* cps;
+};
+
+/**
+ * Single linked list to store event points.
+ */
+struct listen_list {
+ /** next in list */
+ struct listen_list* next;
+ /** event info */
+ struct comm_point* com;
+};
+
+/**
+ * type of ports
+ */
+enum listen_type {
+ /** udp type */
+ listen_type_udp,
+ /** tcp type */
+ listen_type_tcp,
+ /** udp ipv6 (v4mapped) for use with ancillary data */
+ listen_type_udpancil,
+ /** ssl over tcp type */
+ listen_type_ssl
+};
+
+/**
+ * Single linked list to store shared ports that have been
+ * opened for use by all threads.
+ */
+struct listen_port {
+ /** next in list */
+ struct listen_port* next;
+ /** file descriptor, open and ready for use */
+ int fd;
+ /** type of file descriptor, udp or tcp */
+ enum listen_type ftype;
+};
+
+/**
+ * Create shared listening ports
+ * Getaddrinfo, create socket, bind and listen to zero or more
+ * interfaces for IP4 and/or IP6, for UDP and/or TCP.
+ * On the given port number. It creates the sockets.
+ * @param cfg: settings on what ports to open.
+ * @param reuseport: set to true if you want reuseport, or NULL to not have it,
+ * set to false on exit if reuseport failed to apply (because of no
+ * kernel support).
+ * @return: linked list of ports or NULL on error.
+ */
+struct listen_port* listening_ports_open(struct config_file* cfg,
+ int* reuseport);
+
+/**
+ * Close and delete the (list of) listening ports.
+ */
+void listening_ports_free(struct listen_port* list);
+
+/**
+ * Create commpoints with for this thread for the shared ports.
+ * @param base: the comm_base that provides event functionality.
+ * for default all ifs.
+ * @param ports: the list of shared ports.
+ * @param bufsize: size of datagram buffer.
+ * @param tcp_accept_count: max number of simultaneous TCP connections
+ * from clients.
+ * @param sslctx: nonNULL if ssl context.
+ * @param dtenv: nonNULL if dnstap enabled.
+ * @param cb: callback function when a request arrives. It is passed
+ * the packet and user argument. Return true to send a reply.
+ * @param cb_arg: user data argument for callback function.
+ * @return: the malloced listening structure, ready for use. NULL on error.
+ */
+struct listen_dnsport* listen_create(struct comm_base* base,
+ struct listen_port* ports, size_t bufsize, int tcp_accept_count,
+ void* sslctx, struct dt_env *dtenv, comm_point_callback_t* cb,
+ void* cb_arg);
+
+/**
+ * delete the listening structure
+ * @param listen: listening structure.
+ */
+void listen_delete(struct listen_dnsport* listen);
+
+/**
+ * delete listen_list of commpoints. Calls commpointdelete() on items.
+ * This may close the fds or not depending on flags.
+ * @param list: to delete.
+ */
+void listen_list_delete(struct listen_list* list);
+
+/**
+ * get memory size used by the listening structs
+ * @param listen: listening structure.
+ * @return: size in bytes.
+ */
+size_t listen_get_mem(struct listen_dnsport* listen);
+
+/**
+ * stop accept handlers for TCP (until enabled again)
+ * @param listen: listening structure.
+ */
+void listen_stop_accept(struct listen_dnsport* listen);
+
+/**
+ * start accept handlers for TCP (was stopped before)
+ * @param listen: listening structure.
+ */
+void listen_start_accept(struct listen_dnsport* listen);
+
+/**
+ * Create and bind nonblocking UDP socket
+ * @param family: for socket call.
+ * @param socktype: for socket call.
+ * @param addr: for bind call.
+ * @param addrlen: for bind call.
+ * @param v6only: if enabled, IP6 sockets get IP6ONLY option set.
+ * if enabled with value 2 IP6ONLY option is disabled.
+ * @param inuse: on error, this is set true if the port was in use.
+ * @param noproto: on error, this is set true if cause is that the
+ IPv6 proto (family) is not available.
+ * @param rcv: set size on rcvbuf with socket option, if 0 it is not set.
+ * @param snd: set size on sndbuf with socket option, if 0 it is not set.
+ * @param listen: if true, this is a listening UDP port, eg port 53, and
+ * set SO_REUSEADDR on it.
+ * @param reuseport: if nonNULL and true, try to set SO_REUSEPORT on
+ * listening UDP port. Set to false on return if it failed to do so.
+ * @return: the socket. -1 on error.
+ */
+int create_udp_sock(int family, int socktype, struct sockaddr* addr,
+ socklen_t addrlen, int v6only, int* inuse, int* noproto, int rcv,
+ int snd, int listen, int* reuseport);
+
+/**
+ * Create and bind TCP listening socket
+ * @param addr: address info ready to make socket.
+ * @param v6only: enable ip6 only flag on ip6 sockets.
+ * @param noproto: if error caused by lack of protocol support.
+ * @param reuseport: if nonNULL and true, try to set SO_REUSEPORT on
+ * listening UDP port. Set to false on return if it failed to do so.
+ * @return: the socket. -1 on error.
+ */
+int create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
+ int* reuseport);
+
+#endif /* LISTEN_DNSPORT_H */
diff --git a/external/unbound/services/localzone.c b/external/unbound/services/localzone.c
new file mode 100644
index 000000000..d285a127c
--- /dev/null
+++ b/external/unbound/services/localzone.c
@@ -0,0 +1,1400 @@
+/*
+ * services/localzone.c - local zones authority service.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains functions to enable local zone authority service.
+ */
+#include "config.h"
+#include "services/localzone.h"
+#include "ldns/str2wire.h"
+#include "ldns/sbuffer.h"
+#include "util/regional.h"
+#include "util/config_file.h"
+#include "util/data/dname.h"
+#include "util/data/packed_rrset.h"
+#include "util/data/msgencode.h"
+#include "util/net_help.h"
+#include "util/data/msgreply.h"
+#include "util/data/msgparse.h"
+
+struct local_zones*
+local_zones_create(void)
+{
+ struct local_zones* zones = (struct local_zones*)calloc(1,
+ sizeof(*zones));
+ if(!zones)
+ return NULL;
+ rbtree_init(&zones->ztree, &local_zone_cmp);
+ lock_rw_init(&zones->lock);
+ lock_protect(&zones->lock, &zones->ztree, sizeof(zones->ztree));
+ /* also lock protects the rbnode's in struct local_zone */
+ return zones;
+}
+
+/** helper traverse to delete zones */
+static void
+lzdel(rbnode_t* n, void* ATTR_UNUSED(arg))
+{
+ struct local_zone* z = (struct local_zone*)n->key;
+ local_zone_delete(z);
+}
+
+void
+local_zones_delete(struct local_zones* zones)
+{
+ if(!zones)
+ return;
+ lock_rw_destroy(&zones->lock);
+ /* walk through zones and delete them all */
+ traverse_postorder(&zones->ztree, lzdel, NULL);
+ free(zones);
+}
+
+void
+local_zone_delete(struct local_zone* z)
+{
+ if(!z)
+ return;
+ lock_rw_destroy(&z->lock);
+ regional_destroy(z->region);
+ free(z->name);
+ free(z);
+}
+
+int
+local_zone_cmp(const void* z1, const void* z2)
+{
+ /* first sort on class, so that hierarchy can be maintained within
+ * a class */
+ struct local_zone* a = (struct local_zone*)z1;
+ struct local_zone* b = (struct local_zone*)z2;
+ int m;
+ if(a->dclass != b->dclass) {
+ if(a->dclass < b->dclass)
+ return -1;
+ return 1;
+ }
+ return dname_lab_cmp(a->name, a->namelabs, b->name, b->namelabs, &m);
+}
+
+int
+local_data_cmp(const void* d1, const void* d2)
+{
+ struct local_data* a = (struct local_data*)d1;
+ struct local_data* b = (struct local_data*)d2;
+ int m;
+ return dname_canon_lab_cmp(a->name, a->namelabs, b->name,
+ b->namelabs, &m);
+}
+
+/* form wireformat from text format domain name */
+int
+parse_dname(const char* str, uint8_t** res, size_t* len, int* labs)
+{
+ *res = sldns_str2wire_dname(str, len);
+ *labs = 0;
+ if(!*res) {
+ log_err("cannot parse name %s", str);
+ return 0;
+ }
+ *labs = dname_count_size_labels(*res, len);
+ return 1;
+}
+
+/** create a new localzone */
+static struct local_zone*
+local_zone_create(uint8_t* nm, size_t len, int labs,
+ enum localzone_type t, uint16_t dclass)
+{
+ struct local_zone* z = (struct local_zone*)calloc(1, sizeof(*z));
+ if(!z) {
+ return NULL;
+ }
+ z->node.key = z;
+ z->dclass = dclass;
+ z->type = t;
+ z->name = nm;
+ z->namelen = len;
+ z->namelabs = labs;
+ lock_rw_init(&z->lock);
+ z->region = regional_create();
+ if(!z->region) {
+ free(z);
+ return NULL;
+ }
+ rbtree_init(&z->data, &local_data_cmp);
+ lock_protect(&z->lock, &z->parent, sizeof(*z)-sizeof(rbnode_t));
+ /* also the zones->lock protects node, parent, name*, class */
+ return z;
+}
+
+/** enter a new zone with allocated dname returns with WRlock */
+static struct local_zone*
+lz_enter_zone_dname(struct local_zones* zones, uint8_t* nm, size_t len,
+ int labs, enum localzone_type t, uint16_t c)
+{
+ struct local_zone* z = local_zone_create(nm, len, labs, t, c);
+ if(!z) {
+ log_err("out of memory");
+ return NULL;
+ }
+
+ /* add to rbtree */
+ lock_rw_wrlock(&zones->lock);
+ lock_rw_wrlock(&z->lock);
+ if(!rbtree_insert(&zones->ztree, &z->node)) {
+ log_warn("duplicate local-zone");
+ lock_rw_unlock(&z->lock);
+ local_zone_delete(z);
+ lock_rw_unlock(&zones->lock);
+ return NULL;
+ }
+ lock_rw_unlock(&zones->lock);
+ return z;
+}
+
+/** enter a new zone */
+static struct local_zone*
+lz_enter_zone(struct local_zones* zones, const char* name, const char* type,
+ uint16_t dclass)
+{
+ struct local_zone* z;
+ enum localzone_type t;
+ uint8_t* nm;
+ size_t len;
+ int labs;
+ if(!parse_dname(name, &nm, &len, &labs)) {
+ log_err("bad zone name %s %s", name, type);
+ return NULL;
+ }
+ if(!local_zone_str2type(type, &t)) {
+ log_err("bad lz_enter_zone type %s %s", name, type);
+ free(nm);
+ return NULL;
+ }
+ if(!(z=lz_enter_zone_dname(zones, nm, len, labs, t, dclass))) {
+ log_err("could not enter zone %s %s", name, type);
+ return NULL;
+ }
+ return z;
+}
+
+/** return name and class and rdata of rr; parses string */
+static int
+get_rr_content(const char* str, uint8_t** nm, uint16_t* type,
+ uint16_t* dclass, time_t* ttl, uint8_t* rr, size_t len,
+ uint8_t** rdata, size_t* rdata_len)
+{
+ size_t dname_len = 0;
+ int e = sldns_str2wire_rr_buf(str, rr, &len, &dname_len, 3600,
+ NULL, 0, NULL, 0);
+ if(e) {
+ log_err("error parsing local-data at %d: '%s': %s",
+ LDNS_WIREPARSE_OFFSET(e), str,
+ sldns_get_errorstr_parse(e));
+ return 0;
+ }
+ *nm = memdup(rr, dname_len);
+ if(!*nm) {
+ log_err("out of memory");
+ return 0;
+ }
+ *dclass = sldns_wirerr_get_class(rr, len, dname_len);
+ *type = sldns_wirerr_get_type(rr, len, dname_len);
+ *ttl = (time_t)sldns_wirerr_get_ttl(rr, len, dname_len);
+ *rdata = sldns_wirerr_get_rdatawl(rr, len, dname_len);
+ *rdata_len = sldns_wirerr_get_rdatalen(rr, len, dname_len)+2;
+ return 1;
+}
+
+/** return name and class of rr; parses string */
+static int
+get_rr_nameclass(const char* str, uint8_t** nm, uint16_t* dclass)
+{
+ uint8_t rr[LDNS_RR_BUF_SIZE];
+ size_t len = sizeof(rr), dname_len = 0;
+ int s = sldns_str2wire_rr_buf(str, rr, &len, &dname_len, 3600,
+ NULL, 0, NULL, 0);
+ if(s != 0) {
+ log_err("error parsing local-data at %d '%s': %s",
+ LDNS_WIREPARSE_OFFSET(s), str,
+ sldns_get_errorstr_parse(s));
+ return 0;
+ }
+ *nm = memdup(rr, dname_len);
+ *dclass = sldns_wirerr_get_class(rr, len, dname_len);
+ if(!*nm) {
+ log_err("out of memory");
+ return 0;
+ }
+ return 1;
+}
+
+/**
+ * Find an rrset in local data structure.
+ * @param data: local data domain name structure.
+ * @param type: type to look for (host order).
+ * @return rrset pointer or NULL if not found.
+ */
+static struct local_rrset*
+local_data_find_type(struct local_data* data, uint16_t type)
+{
+ struct local_rrset* p;
+ type = htons(type);
+ for(p = data->rrsets; p; p = p->next) {
+ if(p->rrset->rk.type == type)
+ return p;
+ }
+ return NULL;
+}
+
+/** check for RR duplicates */
+static int
+rr_is_duplicate(struct packed_rrset_data* pd, uint8_t* rdata, size_t rdata_len)
+{
+ size_t i;
+ for(i=0; i<pd->count; i++) {
+ if(pd->rr_len[i] == rdata_len &&
+ memcmp(pd->rr_data[i], rdata, rdata_len) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+/** new local_rrset */
+static struct local_rrset*
+new_local_rrset(struct regional* region, struct local_data* node,
+ uint16_t rrtype, uint16_t rrclass)
+{
+ struct packed_rrset_data* pd;
+ struct local_rrset* rrset = (struct local_rrset*)
+ regional_alloc_zero(region, sizeof(*rrset));
+ if(!rrset) {
+ log_err("out of memory");
+ return NULL;
+ }
+ rrset->next = node->rrsets;
+ node->rrsets = rrset;
+ rrset->rrset = (struct ub_packed_rrset_key*)
+ regional_alloc_zero(region, sizeof(*rrset->rrset));
+ if(!rrset->rrset) {
+ log_err("out of memory");
+ return NULL;
+ }
+ rrset->rrset->entry.key = rrset->rrset;
+ pd = (struct packed_rrset_data*)regional_alloc_zero(region,
+ sizeof(*pd));
+ if(!pd) {
+ log_err("out of memory");
+ return NULL;
+ }
+ pd->trust = rrset_trust_prim_noglue;
+ pd->security = sec_status_insecure;
+ rrset->rrset->entry.data = pd;
+ rrset->rrset->rk.dname = node->name;
+ rrset->rrset->rk.dname_len = node->namelen;
+ rrset->rrset->rk.type = htons(rrtype);
+ rrset->rrset->rk.rrset_class = htons(rrclass);
+ return rrset;
+}
+
+/** insert RR into RRset data structure; Wastes a couple of bytes */
+static int
+insert_rr(struct regional* region, struct packed_rrset_data* pd,
+ uint8_t* rdata, size_t rdata_len, time_t ttl)
+{
+ size_t* oldlen = pd->rr_len;
+ time_t* oldttl = pd->rr_ttl;
+ uint8_t** olddata = pd->rr_data;
+
+ /* add RR to rrset */
+ pd->count++;
+ pd->rr_len = regional_alloc(region, sizeof(*pd->rr_len)*pd->count);
+ pd->rr_ttl = regional_alloc(region, sizeof(*pd->rr_ttl)*pd->count);
+ pd->rr_data = regional_alloc(region, sizeof(*pd->rr_data)*pd->count);
+ if(!pd->rr_len || !pd->rr_ttl || !pd->rr_data) {
+ log_err("out of memory");
+ return 0;
+ }
+ if(pd->count > 1) {
+ memcpy(pd->rr_len+1, oldlen,
+ sizeof(*pd->rr_len)*(pd->count-1));
+ memcpy(pd->rr_ttl+1, oldttl,
+ sizeof(*pd->rr_ttl)*(pd->count-1));
+ memcpy(pd->rr_data+1, olddata,
+ sizeof(*pd->rr_data)*(pd->count-1));
+ }
+ pd->rr_len[0] = rdata_len;
+ pd->rr_ttl[0] = ttl;
+ pd->rr_data[0] = regional_alloc_init(region, rdata, rdata_len);
+ if(!pd->rr_data[0]) {
+ log_err("out of memory");
+ return 0;
+ }
+ return 1;
+}
+
+/** find a data node by exact name */
+static struct local_data*
+lz_find_node(struct local_zone* z, uint8_t* nm, size_t nmlen, int nmlabs)
+{
+ struct local_data key;
+ key.node.key = &key;
+ key.name = nm;
+ key.namelen = nmlen;
+ key.namelabs = nmlabs;
+ return (struct local_data*)rbtree_search(&z->data, &key.node);
+}
+
+/** find a node, create it if not and all its empty nonterminal parents */
+static int
+lz_find_create_node(struct local_zone* z, uint8_t* nm, size_t nmlen,
+ int nmlabs, struct local_data** res)
+{
+ struct local_data* ld = lz_find_node(z, nm, nmlen, nmlabs);
+ if(!ld) {
+ /* create a domain name to store rr. */
+ ld = (struct local_data*)regional_alloc_zero(z->region,
+ sizeof(*ld));
+ if(!ld) {
+ log_err("out of memory adding local data");
+ return 0;
+ }
+ ld->node.key = ld;
+ ld->name = regional_alloc_init(z->region, nm, nmlen);
+ if(!ld->name) {
+ log_err("out of memory");
+ return 0;
+ }
+ ld->namelen = nmlen;
+ ld->namelabs = nmlabs;
+ if(!rbtree_insert(&z->data, &ld->node)) {
+ log_assert(0); /* duplicate name */
+ }
+ /* see if empty nonterminals need to be created */
+ if(nmlabs > z->namelabs) {
+ dname_remove_label(&nm, &nmlen);
+ if(!lz_find_create_node(z, nm, nmlen, nmlabs-1, res))
+ return 0;
+ }
+ }
+ *res = ld;
+ return 1;
+}
+
+/** enter data RR into auth zone */
+static int
+lz_enter_rr_into_zone(struct local_zone* z, const char* rrstr)
+{
+ uint8_t* nm;
+ size_t nmlen;
+ int nmlabs;
+ struct local_data* node;
+ struct local_rrset* rrset;
+ struct packed_rrset_data* pd;
+ uint16_t rrtype = 0, rrclass = 0;
+ time_t ttl = 0;
+ uint8_t rr[LDNS_RR_BUF_SIZE];
+ uint8_t* rdata;
+ size_t rdata_len;
+ if(!get_rr_content(rrstr, &nm, &rrtype, &rrclass, &ttl, rr, sizeof(rr),
+ &rdata, &rdata_len)) {
+ log_err("bad local-data: %s", rrstr);
+ return 0;
+ }
+ log_assert(z->dclass == rrclass);
+ if(z->type == local_zone_redirect &&
+ query_dname_compare(z->name, nm) != 0) {
+ log_err("local-data in redirect zone must reside at top of zone"
+ ", not at %s", rrstr);
+ free(nm);
+ return 0;
+ }
+ nmlabs = dname_count_size_labels(nm, &nmlen);
+ if(!lz_find_create_node(z, nm, nmlen, nmlabs, &node)) {
+ free(nm);
+ return 0;
+ }
+ log_assert(node);
+ free(nm);
+
+ rrset = local_data_find_type(node, rrtype);
+ if(!rrset) {
+ rrset = new_local_rrset(z->region, node, rrtype, rrclass);
+ if(!rrset)
+ return 0;
+ if(query_dname_compare(node->name, z->name) == 0) {
+ if(rrtype == LDNS_RR_TYPE_NSEC)
+ rrset->rrset->rk.flags = PACKED_RRSET_NSEC_AT_APEX;
+ if(rrtype == LDNS_RR_TYPE_SOA)
+ z->soa = rrset->rrset;
+ }
+ }
+ pd = (struct packed_rrset_data*)rrset->rrset->entry.data;
+ log_assert(rrset && pd);
+
+ /* check for duplicate RR */
+ if(rr_is_duplicate(pd, rdata, rdata_len)) {
+ verbose(VERB_ALGO, "ignoring duplicate RR: %s", rrstr);
+ return 1;
+ }
+ return insert_rr(z->region, pd, rdata, rdata_len, ttl);
+}
+
+/** enter a data RR into auth data; a zone for it must exist */
+static int
+lz_enter_rr_str(struct local_zones* zones, const char* rr)
+{
+ uint8_t* rr_name;
+ uint16_t rr_class;
+ size_t len;
+ int labs;
+ struct local_zone* z;
+ int r;
+ if(!get_rr_nameclass(rr, &rr_name, &rr_class)) {
+ log_err("bad rr %s", rr);
+ return 0;
+ }
+ labs = dname_count_size_labels(rr_name, &len);
+ lock_rw_rdlock(&zones->lock);
+ z = local_zones_lookup(zones, rr_name, len, labs, rr_class);
+ if(!z) {
+ lock_rw_unlock(&zones->lock);
+ fatal_exit("internal error: no zone for rr %s", rr);
+ }
+ lock_rw_wrlock(&z->lock);
+ lock_rw_unlock(&zones->lock);
+ free(rr_name);
+ r = lz_enter_rr_into_zone(z, rr);
+ lock_rw_unlock(&z->lock);
+ return r;
+}
+
+/** parse local-zone: statements */
+static int
+lz_enter_zones(struct local_zones* zones, struct config_file* cfg)
+{
+ struct config_str2list* p;
+ struct local_zone* z;
+ for(p = cfg->local_zones; p; p = p->next) {
+ if(!(z=lz_enter_zone(zones, p->str, p->str2,
+ LDNS_RR_CLASS_IN)))
+ return 0;
+ lock_rw_unlock(&z->lock);
+ }
+ return 1;
+}
+
+/** lookup a zone in rbtree; exact match only; SLOW due to parse */
+static int
+lz_exists(struct local_zones* zones, const char* name)
+{
+ struct local_zone z;
+ z.node.key = &z;
+ z.dclass = LDNS_RR_CLASS_IN;
+ if(!parse_dname(name, &z.name, &z.namelen, &z.namelabs)) {
+ log_err("bad name %s", name);
+ return 0;
+ }
+ lock_rw_rdlock(&zones->lock);
+ if(rbtree_search(&zones->ztree, &z.node)) {
+ lock_rw_unlock(&zones->lock);
+ free(z.name);
+ return 1;
+ }
+ lock_rw_unlock(&zones->lock);
+ free(z.name);
+ return 0;
+}
+
+/** lookup a zone in cfg->nodefault list */
+static int
+lz_nodefault(struct config_file* cfg, const char* name)
+{
+ struct config_strlist* p;
+ size_t len = strlen(name);
+ if(len == 0) return 0;
+ if(name[len-1] == '.') len--;
+
+ for(p = cfg->local_zones_nodefault; p; p = p->next) {
+ /* compare zone name, lowercase, compare without ending . */
+ if(strncasecmp(p->str, name, len) == 0 &&
+ (strlen(p->str) == len || (strlen(p->str)==len+1 &&
+ p->str[len] == '.')))
+ return 1;
+ }
+ return 0;
+}
+
+/** enter AS112 default zone */
+static int
+add_as112_default(struct local_zones* zones, struct config_file* cfg,
+ const char* name)
+{
+ struct local_zone* z;
+ char str[1024]; /* known long enough */
+ if(lz_exists(zones, name) || lz_nodefault(cfg, name))
+ return 1; /* do not enter default content */
+ if(!(z=lz_enter_zone(zones, name, "static", LDNS_RR_CLASS_IN)))
+ return 0;
+ snprintf(str, sizeof(str), "%s 10800 IN SOA localhost. "
+ "nobody.invalid. 1 3600 1200 604800 10800", name);
+ if(!lz_enter_rr_into_zone(z, str)) {
+ lock_rw_unlock(&z->lock);
+ return 0;
+ }
+ snprintf(str, sizeof(str), "%s 10800 IN NS localhost. ", name);
+ if(!lz_enter_rr_into_zone(z, str)) {
+ lock_rw_unlock(&z->lock);
+ return 0;
+ }
+ lock_rw_unlock(&z->lock);
+ return 1;
+}
+
+/** enter default zones */
+static int
+lz_enter_defaults(struct local_zones* zones, struct config_file* cfg)
+{
+ struct local_zone* z;
+
+ /* this list of zones is from RFC 6303 */
+
+ /* block localhost level zones, first, later the LAN zones */
+
+ /* localhost. zone */
+ if(!lz_exists(zones, "localhost.") &&
+ !lz_nodefault(cfg, "localhost.")) {
+ if(!(z=lz_enter_zone(zones, "localhost.", "static",
+ LDNS_RR_CLASS_IN)) ||
+ !lz_enter_rr_into_zone(z,
+ "localhost. 10800 IN NS localhost.") ||
+ !lz_enter_rr_into_zone(z,
+ "localhost. 10800 IN SOA localhost. nobody.invalid. "
+ "1 3600 1200 604800 10800") ||
+ !lz_enter_rr_into_zone(z,
+ "localhost. 10800 IN A 127.0.0.1") ||
+ !lz_enter_rr_into_zone(z,
+ "localhost. 10800 IN AAAA ::1")) {
+ log_err("out of memory adding default zone");
+ if(z) { lock_rw_unlock(&z->lock); }
+ return 0;
+ }
+ lock_rw_unlock(&z->lock);
+ }
+ /* reverse ip4 zone */
+ if(!lz_exists(zones, "127.in-addr.arpa.") &&
+ !lz_nodefault(cfg, "127.in-addr.arpa.")) {
+ if(!(z=lz_enter_zone(zones, "127.in-addr.arpa.", "static",
+ LDNS_RR_CLASS_IN)) ||
+ !lz_enter_rr_into_zone(z,
+ "127.in-addr.arpa. 10800 IN NS localhost.") ||
+ !lz_enter_rr_into_zone(z,
+ "127.in-addr.arpa. 10800 IN SOA localhost. "
+ "nobody.invalid. 1 3600 1200 604800 10800") ||
+ !lz_enter_rr_into_zone(z,
+ "1.0.0.127.in-addr.arpa. 10800 IN PTR localhost.")) {
+ log_err("out of memory adding default zone");
+ if(z) { lock_rw_unlock(&z->lock); }
+ return 0;
+ }
+ lock_rw_unlock(&z->lock);
+ }
+ /* reverse ip6 zone */
+ if(!lz_exists(zones, "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa.") &&
+ !lz_nodefault(cfg, "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa.")) {
+ if(!(z=lz_enter_zone(zones, "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa.", "static",
+ LDNS_RR_CLASS_IN)) ||
+ !lz_enter_rr_into_zone(z,
+ "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa. 10800 IN NS localhost.") ||
+ !lz_enter_rr_into_zone(z,
+ "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa. 10800 IN SOA localhost. "
+ "nobody.invalid. 1 3600 1200 604800 10800") ||
+ !lz_enter_rr_into_zone(z,
+ "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa. 10800 IN PTR localhost.")) {
+ log_err("out of memory adding default zone");
+ if(z) { lock_rw_unlock(&z->lock); }
+ return 0;
+ }
+ lock_rw_unlock(&z->lock);
+ }
+
+ /* if unblock lan-zones, then do not add the zones below.
+ * we do add the zones above, about 127.0.0.1, because localhost is
+ * not on the lan. */
+ if(cfg->unblock_lan_zones)
+ return 1;
+
+ /* block LAN level zones */
+ if ( !add_as112_default(zones, cfg, "10.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "16.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "17.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "18.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "19.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "20.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "21.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "22.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "23.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "24.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "25.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "26.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "27.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "28.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "29.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "30.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "31.172.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "168.192.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "0.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "64.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "65.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "66.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "67.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "68.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "69.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "70.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "71.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "72.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "73.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "74.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "75.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "76.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "77.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "78.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "79.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "80.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "81.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "82.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "83.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "84.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "85.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "86.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "87.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "88.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "89.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "90.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "91.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "92.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "93.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "94.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "95.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "96.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "97.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "98.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "99.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "100.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "101.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "102.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "103.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "104.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "105.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "106.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "107.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "108.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "109.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "110.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "111.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "112.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "113.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "114.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "115.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "116.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "117.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "118.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "119.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "120.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "121.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "122.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "123.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "124.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "125.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "126.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "127.100.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "254.169.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "2.0.192.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "100.51.198.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "113.0.203.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "255.255.255.255.in-addr.arpa.") ||
+ !add_as112_default(zones, cfg, "0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa.") ||
+ !add_as112_default(zones, cfg, "d.f.ip6.arpa.") ||
+ !add_as112_default(zones, cfg, "8.e.f.ip6.arpa.") ||
+ !add_as112_default(zones, cfg, "9.e.f.ip6.arpa.") ||
+ !add_as112_default(zones, cfg, "a.e.f.ip6.arpa.") ||
+ !add_as112_default(zones, cfg, "b.e.f.ip6.arpa.") ||
+ !add_as112_default(zones, cfg, "8.b.d.0.1.0.0.2.ip6.arpa.")) {
+ log_err("out of memory adding default zone");
+ return 0;
+ }
+ return 1;
+}
+
+/** setup parent pointers, so that a lookup can be done for closest match */
+static void
+init_parents(struct local_zones* zones)
+{
+ struct local_zone* node, *prev = NULL, *p;
+ int m;
+ lock_rw_wrlock(&zones->lock);
+ RBTREE_FOR(node, struct local_zone*, &zones->ztree) {
+ lock_rw_wrlock(&node->lock);
+ node->parent = NULL;
+ if(!prev || prev->dclass != node->dclass) {
+ prev = node;
+ lock_rw_unlock(&node->lock);
+ continue;
+ }
+ (void)dname_lab_cmp(prev->name, prev->namelabs, node->name,
+ node->namelabs, &m); /* we know prev is smaller */
+ /* sort order like: . com. bla.com. zwb.com. net. */
+ /* find the previous, or parent-parent-parent */
+ for(p = prev; p; p = p->parent)
+ /* looking for name with few labels, a parent */
+ if(p->namelabs <= m) {
+ /* ==: since prev matched m, this is closest*/
+ /* <: prev matches more, but is not a parent,
+ * this one is a (grand)parent */
+ node->parent = p;
+ break;
+ }
+ prev = node;
+ lock_rw_unlock(&node->lock);
+ }
+ lock_rw_unlock(&zones->lock);
+}
+
+/** enter implicit transparent zone for local-data: without local-zone: */
+static int
+lz_setup_implicit(struct local_zones* zones, struct config_file* cfg)
+{
+ /* walk over all items that have no parent zone and find
+ * the name that covers them all (could be the root) and
+ * add that as a transparent zone */
+ struct config_strlist* p;
+ int have_name = 0;
+ int have_other_classes = 0;
+ uint16_t dclass = 0;
+ uint8_t* nm = 0;
+ size_t nmlen = 0;
+ int nmlabs = 0;
+ int match = 0; /* number of labels match count */
+
+ init_parents(zones); /* to enable local_zones_lookup() */
+ for(p = cfg->local_data; p; p = p->next) {
+ uint8_t* rr_name;
+ uint16_t rr_class;
+ size_t len;
+ int labs;
+ if(!get_rr_nameclass(p->str, &rr_name, &rr_class)) {
+ log_err("Bad local-data RR %s", p->str);
+ return 0;
+ }
+ labs = dname_count_size_labels(rr_name, &len);
+ lock_rw_rdlock(&zones->lock);
+ if(!local_zones_lookup(zones, rr_name, len, labs, rr_class)) {
+ if(!have_name) {
+ dclass = rr_class;
+ nm = rr_name;
+ nmlen = len;
+ nmlabs = labs;
+ match = labs;
+ have_name = 1;
+ } else {
+ int m;
+ if(rr_class != dclass) {
+ /* process other classes later */
+ free(rr_name);
+ have_other_classes = 1;
+ lock_rw_unlock(&zones->lock);
+ continue;
+ }
+ /* find smallest shared topdomain */
+ (void)dname_lab_cmp(nm, nmlabs,
+ rr_name, labs, &m);
+ free(rr_name);
+ if(m < match)
+ match = m;
+ }
+ } else free(rr_name);
+ lock_rw_unlock(&zones->lock);
+ }
+ if(have_name) {
+ uint8_t* n2;
+ struct local_zone* z;
+ /* allocate zone of smallest shared topdomain to contain em */
+ n2 = nm;
+ dname_remove_labels(&n2, &nmlen, nmlabs - match);
+ n2 = memdup(n2, nmlen);
+ free(nm);
+ if(!n2) {
+ log_err("out of memory");
+ return 0;
+ }
+ log_nametypeclass(VERB_ALGO, "implicit transparent local-zone",
+ n2, 0, dclass);
+ if(!(z=lz_enter_zone_dname(zones, n2, nmlen, match,
+ local_zone_transparent, dclass))) {
+ return 0;
+ }
+ lock_rw_unlock(&z->lock);
+ }
+ if(have_other_classes) {
+ /* restart to setup other class */
+ return lz_setup_implicit(zones, cfg);
+ }
+ return 1;
+}
+
+/** enter auth data */
+static int
+lz_enter_data(struct local_zones* zones, struct config_file* cfg)
+{
+ struct config_strlist* p;
+ for(p = cfg->local_data; p; p = p->next) {
+ if(!lz_enter_rr_str(zones, p->str))
+ return 0;
+ }
+ return 1;
+}
+
+/** free memory from config */
+static void
+lz_freeup_cfg(struct config_file* cfg)
+{
+ config_deldblstrlist(cfg->local_zones);
+ cfg->local_zones = NULL;
+ config_delstrlist(cfg->local_zones_nodefault);
+ cfg->local_zones_nodefault = NULL;
+ config_delstrlist(cfg->local_data);
+ cfg->local_data = NULL;
+}
+
+int
+local_zones_apply_cfg(struct local_zones* zones, struct config_file* cfg)
+{
+ /* create zones from zone statements. */
+ if(!lz_enter_zones(zones, cfg)) {
+ return 0;
+ }
+ /* apply default zones+content (unless disabled, or overridden) */
+ if(!lz_enter_defaults(zones, cfg)) {
+ return 0;
+ }
+ /* create implicit transparent zone from data. */
+ if(!lz_setup_implicit(zones, cfg)) {
+ return 0;
+ }
+
+ /* setup parent ptrs for lookup during data entry */
+ init_parents(zones);
+ /* insert local data */
+ if(!lz_enter_data(zones, cfg)) {
+ return 0;
+ }
+ /* freeup memory from cfg struct. */
+ lz_freeup_cfg(cfg);
+ return 1;
+}
+
+struct local_zone*
+local_zones_lookup(struct local_zones* zones,
+ uint8_t* name, size_t len, int labs, uint16_t dclass)
+{
+ rbnode_t* res = NULL;
+ struct local_zone *result;
+ struct local_zone key;
+ key.node.key = &key;
+ key.dclass = dclass;
+ key.name = name;
+ key.namelen = len;
+ key.namelabs = labs;
+ if(rbtree_find_less_equal(&zones->ztree, &key, &res)) {
+ /* exact */
+ return (struct local_zone*)res;
+ } else {
+ /* smaller element (or no element) */
+ int m;
+ result = (struct local_zone*)res;
+ if(!result || result->dclass != dclass)
+ return NULL;
+ /* count number of labels matched */
+ (void)dname_lab_cmp(result->name, result->namelabs, key.name,
+ key.namelabs, &m);
+ while(result) { /* go up until qname is subdomain of zone */
+ if(result->namelabs <= m)
+ break;
+ result = result->parent;
+ }
+ return result;
+ }
+}
+
+struct local_zone*
+local_zones_find(struct local_zones* zones,
+ uint8_t* name, size_t len, int labs, uint16_t dclass)
+{
+ struct local_zone key;
+ key.node.key = &key;
+ key.dclass = dclass;
+ key.name = name;
+ key.namelen = len;
+ key.namelabs = labs;
+ /* exact */
+ return (struct local_zone*)rbtree_search(&zones->ztree, &key);
+}
+
+/** print all RRsets in local zone */
+static void
+local_zone_out(struct local_zone* z)
+{
+ struct local_data* d;
+ struct local_rrset* p;
+ RBTREE_FOR(d, struct local_data*, &z->data) {
+ for(p = d->rrsets; p; p = p->next) {
+ log_nametypeclass(0, "rrset", d->name,
+ ntohs(p->rrset->rk.type),
+ ntohs(p->rrset->rk.rrset_class));
+ }
+ }
+}
+
+void local_zones_print(struct local_zones* zones)
+{
+ struct local_zone* z;
+ lock_rw_rdlock(&zones->lock);
+ log_info("number of auth zones %u", (unsigned)zones->ztree.count);
+ RBTREE_FOR(z, struct local_zone*, &zones->ztree) {
+ lock_rw_rdlock(&z->lock);
+ switch(z->type) {
+ case local_zone_deny:
+ log_nametypeclass(0, "deny zone",
+ z->name, 0, z->dclass);
+ break;
+ case local_zone_refuse:
+ log_nametypeclass(0, "refuse zone",
+ z->name, 0, z->dclass);
+ break;
+ case local_zone_redirect:
+ log_nametypeclass(0, "redirect zone",
+ z->name, 0, z->dclass);
+ break;
+ case local_zone_transparent:
+ log_nametypeclass(0, "transparent zone",
+ z->name, 0, z->dclass);
+ break;
+ case local_zone_typetransparent:
+ log_nametypeclass(0, "typetransparent zone",
+ z->name, 0, z->dclass);
+ break;
+ case local_zone_static:
+ log_nametypeclass(0, "static zone",
+ z->name, 0, z->dclass);
+ break;
+ default:
+ log_nametypeclass(0, "badtyped zone",
+ z->name, 0, z->dclass);
+ break;
+ }
+ local_zone_out(z);
+ lock_rw_unlock(&z->lock);
+ }
+ lock_rw_unlock(&zones->lock);
+}
+
+/** encode answer consisting of 1 rrset */
+static int
+local_encode(struct query_info* qinfo, struct edns_data* edns,
+ sldns_buffer* buf, struct regional* temp,
+ struct ub_packed_rrset_key* rrset, int ansec, int rcode)
+{
+ struct reply_info rep;
+ uint16_t udpsize;
+ /* make answer with time=0 for fixed TTL values */
+ memset(&rep, 0, sizeof(rep));
+ rep.flags = (uint16_t)((BIT_QR | BIT_AA | BIT_RA) | rcode);
+ rep.qdcount = 1;
+ if(ansec)
+ rep.an_numrrsets = 1;
+ else rep.ns_numrrsets = 1;
+ rep.rrset_count = 1;
+ rep.rrsets = &rrset;
+ udpsize = edns->udp_size;
+ edns->edns_version = EDNS_ADVERTISED_VERSION;
+ edns->udp_size = EDNS_ADVERTISED_SIZE;
+ edns->ext_rcode = 0;
+ edns->bits &= EDNS_DO;
+ if(!reply_info_answer_encode(qinfo, &rep,
+ *(uint16_t*)sldns_buffer_begin(buf),
+ sldns_buffer_read_u16_at(buf, 2),
+ buf, 0, 0, temp, udpsize, edns,
+ (int)(edns->bits&EDNS_DO), 0))
+ error_encode(buf, (LDNS_RCODE_SERVFAIL|BIT_AA), qinfo,
+ *(uint16_t*)sldns_buffer_begin(buf),
+ sldns_buffer_read_u16_at(buf, 2), edns);
+ return 1;
+}
+
+/** answer local data match */
+static int
+local_data_answer(struct local_zone* z, struct query_info* qinfo,
+ struct edns_data* edns, sldns_buffer* buf, struct regional* temp,
+ int labs, struct local_data** ldp)
+{
+ struct local_data key;
+ struct local_data* ld;
+ struct local_rrset* lr;
+ key.node.key = &key;
+ key.name = qinfo->qname;
+ key.namelen = qinfo->qname_len;
+ key.namelabs = labs;
+ if(z->type == local_zone_redirect) {
+ key.name = z->name;
+ key.namelen = z->namelen;
+ key.namelabs = z->namelabs;
+ }
+ ld = (struct local_data*)rbtree_search(&z->data, &key.node);
+ *ldp = ld;
+ if(!ld) {
+ return 0;
+ }
+ lr = local_data_find_type(ld, qinfo->qtype);
+ if(!lr)
+ return 0;
+ if(z->type == local_zone_redirect) {
+ /* convert rrset name to query name; like a wildcard */
+ struct ub_packed_rrset_key r = *lr->rrset;
+ r.rk.dname = qinfo->qname;
+ r.rk.dname_len = qinfo->qname_len;
+ return local_encode(qinfo, edns, buf, temp, &r, 1,
+ LDNS_RCODE_NOERROR);
+ }
+ return local_encode(qinfo, edns, buf, temp, lr->rrset, 1,
+ LDNS_RCODE_NOERROR);
+}
+
+/**
+ * answer in case where no exact match is found
+ * @param z: zone for query
+ * @param qinfo: query
+ * @param edns: edns from query
+ * @param buf: buffer for answer.
+ * @param temp: temp region for encoding
+ * @param ld: local data, if NULL, no such name exists in localdata.
+ * @return 1 if a reply is to be sent, 0 if not.
+ */
+static int
+lz_zone_answer(struct local_zone* z, struct query_info* qinfo,
+ struct edns_data* edns, sldns_buffer* buf, struct regional* temp,
+ struct local_data* ld)
+{
+ if(z->type == local_zone_deny) {
+ /** no reply at all, signal caller by clearing buffer. */
+ sldns_buffer_clear(buf);
+ sldns_buffer_flip(buf);
+ return 1;
+ } else if(z->type == local_zone_refuse) {
+ error_encode(buf, (LDNS_RCODE_REFUSED|BIT_AA), qinfo,
+ *(uint16_t*)sldns_buffer_begin(buf),
+ sldns_buffer_read_u16_at(buf, 2), edns);
+ return 1;
+ } else if(z->type == local_zone_static ||
+ z->type == local_zone_redirect) {
+ /* for static, reply nodata or nxdomain
+ * for redirect, reply nodata */
+ /* no additional section processing,
+ * cname, dname or wildcard processing,
+ * or using closest match for NSEC.
+ * or using closest match for returning delegation downwards
+ */
+ int rcode = ld?LDNS_RCODE_NOERROR:LDNS_RCODE_NXDOMAIN;
+ if(z->soa)
+ return local_encode(qinfo, edns, buf, temp,
+ z->soa, 0, rcode);
+ error_encode(buf, (rcode|BIT_AA), qinfo,
+ *(uint16_t*)sldns_buffer_begin(buf),
+ sldns_buffer_read_u16_at(buf, 2), edns);
+ return 1;
+ } else if(z->type == local_zone_typetransparent) {
+ /* no NODATA or NXDOMAINS for this zone type */
+ return 0;
+ }
+ /* else z->type == local_zone_transparent */
+
+ /* if the zone is transparent and the name exists, but the type
+ * does not, then we should make this noerror/nodata */
+ if(ld && ld->rrsets) {
+ int rcode = LDNS_RCODE_NOERROR;
+ if(z->soa)
+ return local_encode(qinfo, edns, buf, temp,
+ z->soa, 0, rcode);
+ error_encode(buf, (rcode|BIT_AA), qinfo,
+ *(uint16_t*)sldns_buffer_begin(buf),
+ sldns_buffer_read_u16_at(buf, 2), edns);
+ return 1;
+ }
+
+ /* stop here, and resolve further on */
+ return 0;
+}
+
+int
+local_zones_answer(struct local_zones* zones, struct query_info* qinfo,
+ struct edns_data* edns, sldns_buffer* buf, struct regional* temp)
+{
+ /* see if query is covered by a zone,
+ * if so: - try to match (exact) local data
+ * - look at zone type for negative response. */
+ int labs = dname_count_labels(qinfo->qname);
+ struct local_data* ld;
+ struct local_zone* z;
+ int r;
+ lock_rw_rdlock(&zones->lock);
+ z = local_zones_lookup(zones, qinfo->qname,
+ qinfo->qname_len, labs, qinfo->qclass);
+ if(!z) {
+ lock_rw_unlock(&zones->lock);
+ return 0;
+ }
+ lock_rw_rdlock(&z->lock);
+ lock_rw_unlock(&zones->lock);
+
+ if(local_data_answer(z, qinfo, edns, buf, temp, labs, &ld)) {
+ lock_rw_unlock(&z->lock);
+ return 1;
+ }
+ r = lz_zone_answer(z, qinfo, edns, buf, temp, ld);
+ lock_rw_unlock(&z->lock);
+ return r;
+}
+
+const char* local_zone_type2str(enum localzone_type t)
+{
+ switch(t) {
+ case local_zone_deny: return "deny";
+ case local_zone_refuse: return "refuse";
+ case local_zone_redirect: return "redirect";
+ case local_zone_transparent: return "transparent";
+ case local_zone_typetransparent: return "typetransparent";
+ case local_zone_static: return "static";
+ case local_zone_nodefault: return "nodefault";
+ }
+ return "badtyped";
+}
+
+int local_zone_str2type(const char* type, enum localzone_type* t)
+{
+ if(strcmp(type, "deny") == 0)
+ *t = local_zone_deny;
+ else if(strcmp(type, "refuse") == 0)
+ *t = local_zone_refuse;
+ else if(strcmp(type, "static") == 0)
+ *t = local_zone_static;
+ else if(strcmp(type, "transparent") == 0)
+ *t = local_zone_transparent;
+ else if(strcmp(type, "typetransparent") == 0)
+ *t = local_zone_typetransparent;
+ else if(strcmp(type, "redirect") == 0)
+ *t = local_zone_redirect;
+ else return 0;
+ return 1;
+}
+
+/** iterate over the kiddies of the given name and set their parent ptr */
+static void
+set_kiddo_parents(struct local_zone* z, struct local_zone* match,
+ struct local_zone* newp)
+{
+ /* both zones and z are locked already */
+ /* in the sorted rbtree, the kiddies of z are located after z */
+ /* z must be present in the tree */
+ struct local_zone* p = z;
+ p = (struct local_zone*)rbtree_next(&p->node);
+ while(p!=(struct local_zone*)RBTREE_NULL &&
+ p->dclass == z->dclass && dname_strict_subdomain(p->name,
+ p->namelabs, z->name, z->namelabs)) {
+ /* update parent ptr */
+ /* only when matches with existing parent pointer, so that
+ * deeper child structures are not touched, i.e.
+ * update of x, and a.x, b.x, f.b.x, g.b.x, c.x, y
+ * gets to update a.x, b.x and c.x */
+ lock_rw_wrlock(&p->lock);
+ if(p->parent == match)
+ p->parent = newp;
+ lock_rw_unlock(&p->lock);
+ p = (struct local_zone*)rbtree_next(&p->node);
+ }
+}
+
+struct local_zone* local_zones_add_zone(struct local_zones* zones,
+ uint8_t* name, size_t len, int labs, uint16_t dclass,
+ enum localzone_type tp)
+{
+ /* create */
+ struct local_zone* z = local_zone_create(name, len, labs, tp, dclass);
+ if(!z) return NULL;
+ lock_rw_wrlock(&z->lock);
+
+ /* find the closest parent */
+ z->parent = local_zones_find(zones, name, len, labs, dclass);
+
+ /* insert into the tree */
+ if(!rbtree_insert(&zones->ztree, &z->node)) {
+ /* duplicate entry! */
+ lock_rw_unlock(&z->lock);
+ local_zone_delete(z);
+ log_err("internal: duplicate entry in local_zones_add_zone");
+ return NULL;
+ }
+
+ /* set parent pointers right */
+ set_kiddo_parents(z, z->parent, z);
+
+ lock_rw_unlock(&z->lock);
+ return z;
+}
+
+void local_zones_del_zone(struct local_zones* zones, struct local_zone* z)
+{
+ /* fix up parents in tree */
+ lock_rw_wrlock(&z->lock);
+ set_kiddo_parents(z, z, z->parent);
+
+ /* remove from tree */
+ (void)rbtree_delete(&zones->ztree, z);
+
+ /* delete the zone */
+ lock_rw_unlock(&z->lock);
+ local_zone_delete(z);
+}
+
+int
+local_zones_add_RR(struct local_zones* zones, const char* rr)
+{
+ uint8_t* rr_name;
+ uint16_t rr_class;
+ size_t len;
+ int labs;
+ struct local_zone* z;
+ int r;
+ if(!get_rr_nameclass(rr, &rr_name, &rr_class)) {
+ return 0;
+ }
+ labs = dname_count_size_labels(rr_name, &len);
+ /* could first try readlock then get writelock if zone does not exist,
+ * but we do not add enough RRs (from multiple threads) to optimize */
+ lock_rw_wrlock(&zones->lock);
+ z = local_zones_lookup(zones, rr_name, len, labs, rr_class);
+ if(!z) {
+ z = local_zones_add_zone(zones, rr_name, len, labs, rr_class,
+ local_zone_transparent);
+ if(!z) {
+ lock_rw_unlock(&zones->lock);
+ return 0;
+ }
+ } else {
+ free(rr_name);
+ }
+ lock_rw_wrlock(&z->lock);
+ lock_rw_unlock(&zones->lock);
+ r = lz_enter_rr_into_zone(z, rr);
+ lock_rw_unlock(&z->lock);
+ return r;
+}
+
+/** returns true if the node is terminal so no deeper domain names exist */
+static int
+is_terminal(struct local_data* d)
+{
+ /* for empty nonterminals, the deeper domain names are sorted
+ * right after them, so simply check the next name in the tree
+ */
+ struct local_data* n = (struct local_data*)rbtree_next(&d->node);
+ if(n == (struct local_data*)RBTREE_NULL)
+ return 1; /* last in tree, no deeper node */
+ if(dname_strict_subdomain(n->name, n->namelabs, d->name, d->namelabs))
+ return 0; /* there is a deeper node */
+ return 1;
+}
+
+/** delete empty terminals from tree when final data is deleted */
+static void
+del_empty_term(struct local_zone* z, struct local_data* d,
+ uint8_t* name, size_t len, int labs)
+{
+ while(d && d->rrsets == NULL && is_terminal(d)) {
+ /* is this empty nonterminal? delete */
+ /* note, no memory recycling in zone region */
+ (void)rbtree_delete(&z->data, d);
+
+ /* go up and to the next label */
+ if(dname_is_root(name))
+ return;
+ dname_remove_label(&name, &len);
+ labs--;
+ d = lz_find_node(z, name, len, labs);
+ }
+}
+
+void local_zones_del_data(struct local_zones* zones,
+ uint8_t* name, size_t len, int labs, uint16_t dclass)
+{
+ /* find zone */
+ struct local_zone* z;
+ struct local_data* d;
+ lock_rw_rdlock(&zones->lock);
+ z = local_zones_lookup(zones, name, len, labs, dclass);
+ if(!z) {
+ /* no such zone, we're done */
+ lock_rw_unlock(&zones->lock);
+ return;
+ }
+ lock_rw_wrlock(&z->lock);
+ lock_rw_unlock(&zones->lock);
+
+ /* find the domain */
+ d = lz_find_node(z, name, len, labs);
+ if(d) {
+ /* no memory recycling for zone deletions ... */
+ d->rrsets = NULL;
+ /* did we delete the soa record ? */
+ if(query_dname_compare(d->name, z->name) == 0)
+ z->soa = NULL;
+
+ /* cleanup the empty nonterminals for this name */
+ del_empty_term(z, d, name, len, labs);
+ }
+
+ lock_rw_unlock(&z->lock);
+}
diff --git a/external/unbound/services/localzone.h b/external/unbound/services/localzone.h
new file mode 100644
index 000000000..788fbfb3b
--- /dev/null
+++ b/external/unbound/services/localzone.h
@@ -0,0 +1,317 @@
+/*
+ * services/localzone.h - local zones authority service.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains functions to enable local zone authority service.
+ */
+
+#ifndef SERVICES_LOCALZONE_H
+#define SERVICES_LOCALZONE_H
+#include "util/rbtree.h"
+#include "util/locks.h"
+struct ub_packed_rrset_key;
+struct regional;
+struct config_file;
+struct edns_data;
+struct query_info;
+struct sldns_buffer;
+
+/**
+ * Local zone type
+ * This type determines processing for queries that did not match
+ * local-data directly.
+ */
+enum localzone_type {
+ /** drop query */
+ local_zone_deny = 0,
+ /** answer with error */
+ local_zone_refuse,
+ /** answer nxdomain or nodata */
+ local_zone_static,
+ /** resolve normally */
+ local_zone_transparent,
+ /** do not block types at localdata names */
+ local_zone_typetransparent,
+ /** answer with data at zone apex */
+ local_zone_redirect,
+ /** remove default AS112 blocking contents for zone
+ * nodefault is used in config not during service. */
+ local_zone_nodefault
+};
+
+/**
+ * Authoritative local zones storage, shared.
+ */
+struct local_zones {
+ /** lock on the localzone tree */
+ lock_rw_t lock;
+ /** rbtree of struct local_zone */
+ rbtree_t ztree;
+};
+
+/**
+ * Local zone. A locally served authoritative zone.
+ */
+struct local_zone {
+ /** rbtree node, key is name and class */
+ rbnode_t node;
+ /** parent zone, if any. */
+ struct local_zone* parent;
+
+ /** zone name, in uncompressed wireformat */
+ uint8_t* name;
+ /** length of zone name */
+ size_t namelen;
+ /** number of labels in zone name */
+ int namelabs;
+ /** the class of this zone.
+ * uses 'dclass' to not conflict with c++ keyword class. */
+ uint16_t dclass;
+
+ /** lock on the data in the structure
+ * For the node, parent, name, namelen, namelabs, dclass, you
+ * need to also hold the zones_tree lock to change them (or to
+ * delete this zone) */
+ lock_rw_t lock;
+
+ /** how to process zone */
+ enum localzone_type type;
+
+ /** in this region the zone's data is allocated.
+ * the struct local_zone itself is malloced. */
+ struct regional* region;
+ /** local data for this zone
+ * rbtree of struct local_data */
+ rbtree_t data;
+ /** if data contains zone apex SOA data, this is a ptr to it. */
+ struct ub_packed_rrset_key* soa;
+};
+
+/**
+ * Local data. One domain name, and the RRs to go with it.
+ */
+struct local_data {
+ /** rbtree node, key is name only */
+ rbnode_t node;
+ /** domain name */
+ uint8_t* name;
+ /** length of name */
+ size_t namelen;
+ /** number of labels in name */
+ int namelabs;
+ /** the data rrsets, with different types, linked list.
+ * If this list is NULL, the node is an empty non-terminal. */
+ struct local_rrset* rrsets;
+};
+
+/**
+ * A local data RRset
+ */
+struct local_rrset {
+ /** next in list */
+ struct local_rrset* next;
+ /** RRset data item */
+ struct ub_packed_rrset_key* rrset;
+};
+
+/**
+ * Create local zones storage
+ * @return new struct or NULL on error.
+ */
+struct local_zones* local_zones_create(void);
+
+/**
+ * Delete local zones storage
+ * @param zones: to delete.
+ */
+void local_zones_delete(struct local_zones* zones);
+
+/**
+ * Apply config settings; setup the local authoritative data.
+ * Takes care of locking.
+ * @param zones: is set up.
+ * @param cfg: config data.
+ * @return false on error.
+ */
+int local_zones_apply_cfg(struct local_zones* zones, struct config_file* cfg);
+
+/**
+ * Compare two local_zone entries in rbtree. Sort hierarchical but not
+ * canonical
+ * @param z1: zone 1
+ * @param z2: zone 2
+ * @return: -1, 0, +1 comparison value.
+ */
+int local_zone_cmp(const void* z1, const void* z2);
+
+/**
+ * Compare two local_data entries in rbtree. Sort canonical.
+ * @param d1: data 1
+ * @param d2: data 2
+ * @return: -1, 0, +1 comparison value.
+ */
+int local_data_cmp(const void* d1, const void* d2);
+
+/**
+ * Delete one zone
+ * @param z: to delete.
+ */
+void local_zone_delete(struct local_zone* z);
+
+/**
+ * Lookup zone that contains the given name, class.
+ * User must lock the tree or result zone.
+ * @param zones: the zones tree
+ * @param name: dname to lookup
+ * @param len: length of name.
+ * @param labs: labelcount of name.
+ * @param dclass: class to lookup.
+ * @return closest local_zone or NULL if no covering zone is found.
+ */
+struct local_zone* local_zones_lookup(struct local_zones* zones,
+ uint8_t* name, size_t len, int labs, uint16_t dclass);
+
+/**
+ * Debug helper. Print all zones
+ * Takes care of locking.
+ * @param zones: the zones tree
+ */
+void local_zones_print(struct local_zones* zones);
+
+/**
+ * Answer authoritatively for local zones.
+ * Takes care of locking.
+ * @param zones: the stored zones (shared, read only).
+ * @param qinfo: query info (parsed).
+ * @param edns: edns info (parsed).
+ * @param buf: buffer with query ID and flags, also for reply.
+ * @param temp: temporary storage region.
+ * @return true if answer is in buffer. false if query is not answered
+ * by authority data. If the reply should be dropped altogether, the return
+ * value is true, but the buffer is cleared (empty).
+ */
+int local_zones_answer(struct local_zones* zones, struct query_info* qinfo,
+ struct edns_data* edns, struct sldns_buffer* buf, struct regional* temp);
+
+/**
+ * Parse the string into localzone type.
+ *
+ * @param str: string to parse
+ * @param t: local zone type returned here.
+ * @return 0 on parse error.
+ */
+int local_zone_str2type(const char* str, enum localzone_type* t);
+
+/**
+ * Print localzone type to a string. Pointer to a constant string.
+ *
+ * @param t: local zone type.
+ * @return constant string that describes type.
+ */
+const char* local_zone_type2str(enum localzone_type t);
+
+/**
+ * Find zone that with exactly given name, class.
+ * User must lock the tree or result zone.
+ * @param zones: the zones tree
+ * @param name: dname to lookup
+ * @param len: length of name.
+ * @param labs: labelcount of name.
+ * @param dclass: class to lookup.
+ * @return the exact local_zone or NULL.
+ */
+struct local_zone* local_zones_find(struct local_zones* zones,
+ uint8_t* name, size_t len, int labs, uint16_t dclass);
+
+/**
+ * Add a new zone. Caller must hold the zones lock.
+ * Adjusts the other zones as well (parent pointers) after insertion.
+ * The zone must NOT exist (returns NULL and logs error).
+ * @param zones: the zones tree
+ * @param name: dname to add
+ * @param len: length of name.
+ * @param labs: labelcount of name.
+ * @param dclass: class to add.
+ * @param tp: type.
+ * @return local_zone or NULL on error, caller must printout memory error.
+ */
+struct local_zone* local_zones_add_zone(struct local_zones* zones,
+ uint8_t* name, size_t len, int labs, uint16_t dclass,
+ enum localzone_type tp);
+
+/**
+ * Delete a zone. Caller must hold the zones lock.
+ * Adjusts the other zones as well (parent pointers) after insertion.
+ * @param zones: the zones tree
+ * @param zone: the zone to delete from tree. Also deletes zone from memory.
+ */
+void local_zones_del_zone(struct local_zones* zones, struct local_zone* zone);
+
+/**
+ * Add RR data into the localzone data.
+ * Looks up the zone, if no covering zone, a transparent zone with the
+ * name of the RR is created.
+ * @param zones: the zones tree. Not locked by caller.
+ * @param rr: string with on RR.
+ * @return false on failure.
+ */
+int local_zones_add_RR(struct local_zones* zones, const char* rr);
+
+/**
+ * Remove data from domain name in the tree.
+ * All types are removed. No effect if zone or name does not exist.
+ * @param zones: zones tree.
+ * @param name: dname to remove
+ * @param len: length of name.
+ * @param labs: labelcount of name.
+ * @param dclass: class to remove.
+ */
+void local_zones_del_data(struct local_zones* zones,
+ uint8_t* name, size_t len, int labs, uint16_t dclass);
+
+
+/**
+ * Form wireformat from text format domain name.
+ * @param str: the domain name in text "www.example.com"
+ * @param res: resulting wireformat is stored here with malloc.
+ * @param len: length of resulting wireformat.
+ * @param labs: number of labels in resulting wireformat.
+ * @return false on error, syntax or memory. Also logged.
+ */
+int parse_dname(const char* str, uint8_t** res, size_t* len, int* labs);
+
+#endif /* SERVICES_LOCALZONE_H */
diff --git a/external/unbound/services/mesh.c b/external/unbound/services/mesh.c
new file mode 100644
index 000000000..bc711d9b3
--- /dev/null
+++ b/external/unbound/services/mesh.c
@@ -0,0 +1,1209 @@
+/*
+ * services/mesh.c - deal with mesh of query states and handle events for that.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains functions to assist in dealing with a mesh of
+ * query states. This mesh is supposed to be thread-specific.
+ * It consists of query states (per qname, qtype, qclass) and connections
+ * between query states and the super and subquery states, and replies to
+ * send back to clients.
+ */
+#include "config.h"
+#include "services/mesh.h"
+#include "services/outbound_list.h"
+#include "services/cache/dns.h"
+#include "util/log.h"
+#include "util/net_help.h"
+#include "util/module.h"
+#include "util/regional.h"
+#include "util/data/msgencode.h"
+#include "util/timehist.h"
+#include "util/fptr_wlist.h"
+#include "util/alloc.h"
+#include "util/config_file.h"
+#include "ldns/sbuffer.h"
+
+/** subtract timers and the values do not overflow or become negative */
+static void
+timeval_subtract(struct timeval* d, const struct timeval* end, const struct timeval* start)
+{
+#ifndef S_SPLINT_S
+ time_t end_usec = end->tv_usec;
+ d->tv_sec = end->tv_sec - start->tv_sec;
+ if(end_usec < start->tv_usec) {
+ end_usec += 1000000;
+ d->tv_sec--;
+ }
+ d->tv_usec = end_usec - start->tv_usec;
+#endif
+}
+
+/** add timers and the values do not overflow or become negative */
+static void
+timeval_add(struct timeval* d, const struct timeval* add)
+{
+#ifndef S_SPLINT_S
+ d->tv_sec += add->tv_sec;
+ d->tv_usec += add->tv_usec;
+ if(d->tv_usec > 1000000 ) {
+ d->tv_usec -= 1000000;
+ d->tv_sec++;
+ }
+#endif
+}
+
+/** divide sum of timers to get average */
+static void
+timeval_divide(struct timeval* avg, const struct timeval* sum, size_t d)
+{
+#ifndef S_SPLINT_S
+ size_t leftover;
+ if(d == 0) {
+ avg->tv_sec = 0;
+ avg->tv_usec = 0;
+ return;
+ }
+ avg->tv_sec = sum->tv_sec / d;
+ avg->tv_usec = sum->tv_usec / d;
+ /* handle fraction from seconds divide */
+ leftover = sum->tv_sec - avg->tv_sec*d;
+ avg->tv_usec += (leftover*1000000)/d;
+#endif
+}
+
+/** histogram compare of time values */
+static int
+timeval_smaller(const struct timeval* x, const struct timeval* y)
+{
+#ifndef S_SPLINT_S
+ if(x->tv_sec < y->tv_sec)
+ return 1;
+ else if(x->tv_sec == y->tv_sec) {
+ if(x->tv_usec <= y->tv_usec)
+ return 1;
+ else return 0;
+ }
+ else return 0;
+#endif
+}
+
+int
+mesh_state_compare(const void* ap, const void* bp)
+{
+ struct mesh_state* a = (struct mesh_state*)ap;
+ struct mesh_state* b = (struct mesh_state*)bp;
+
+ if(a->s.is_priming && !b->s.is_priming)
+ return -1;
+ if(!a->s.is_priming && b->s.is_priming)
+ return 1;
+
+ if((a->s.query_flags&BIT_RD) && !(b->s.query_flags&BIT_RD))
+ return -1;
+ if(!(a->s.query_flags&BIT_RD) && (b->s.query_flags&BIT_RD))
+ return 1;
+
+ if((a->s.query_flags&BIT_CD) && !(b->s.query_flags&BIT_CD))
+ return -1;
+ if(!(a->s.query_flags&BIT_CD) && (b->s.query_flags&BIT_CD))
+ return 1;
+
+ return query_info_compare(&a->s.qinfo, &b->s.qinfo);
+}
+
+int
+mesh_state_ref_compare(const void* ap, const void* bp)
+{
+ struct mesh_state_ref* a = (struct mesh_state_ref*)ap;
+ struct mesh_state_ref* b = (struct mesh_state_ref*)bp;
+ return mesh_state_compare(a->s, b->s);
+}
+
+struct mesh_area*
+mesh_create(struct module_stack* stack, struct module_env* env)
+{
+ struct mesh_area* mesh = calloc(1, sizeof(struct mesh_area));
+ if(!mesh) {
+ log_err("mesh area alloc: out of memory");
+ return NULL;
+ }
+ mesh->histogram = timehist_setup();
+ mesh->qbuf_bak = sldns_buffer_new(env->cfg->msg_buffer_size);
+ if(!mesh->histogram || !mesh->qbuf_bak) {
+ free(mesh);
+ log_err("mesh area alloc: out of memory");
+ return NULL;
+ }
+ mesh->mods = *stack;
+ mesh->env = env;
+ rbtree_init(&mesh->run, &mesh_state_compare);
+ rbtree_init(&mesh->all, &mesh_state_compare);
+ mesh->num_reply_addrs = 0;
+ mesh->num_reply_states = 0;
+ mesh->num_detached_states = 0;
+ mesh->num_forever_states = 0;
+ mesh->stats_jostled = 0;
+ mesh->stats_dropped = 0;
+ mesh->max_reply_states = env->cfg->num_queries_per_thread;
+ mesh->max_forever_states = (mesh->max_reply_states+1)/2;
+#ifndef S_SPLINT_S
+ mesh->jostle_max.tv_sec = (time_t)(env->cfg->jostle_time / 1000);
+ mesh->jostle_max.tv_usec = (time_t)((env->cfg->jostle_time % 1000)
+ *1000);
+#endif
+ return mesh;
+}
+
+/** help mesh delete delete mesh states */
+static void
+mesh_delete_helper(rbnode_t* n)
+{
+ struct mesh_state* mstate = (struct mesh_state*)n->key;
+ /* perform a full delete, not only 'cleanup' routine,
+ * because other callbacks expect a clean state in the mesh.
+ * For 're-entrant' calls */
+ mesh_state_delete(&mstate->s);
+ /* but because these delete the items from the tree, postorder
+ * traversal and rbtree rebalancing do not work together */
+}
+
+void
+mesh_delete(struct mesh_area* mesh)
+{
+ if(!mesh)
+ return;
+ /* free all query states */
+ while(mesh->all.count)
+ mesh_delete_helper(mesh->all.root);
+ timehist_delete(mesh->histogram);
+ sldns_buffer_free(mesh->qbuf_bak);
+ free(mesh);
+}
+
+void
+mesh_delete_all(struct mesh_area* mesh)
+{
+ /* free all query states */
+ while(mesh->all.count)
+ mesh_delete_helper(mesh->all.root);
+ mesh->stats_dropped += mesh->num_reply_addrs;
+ /* clear mesh area references */
+ rbtree_init(&mesh->run, &mesh_state_compare);
+ rbtree_init(&mesh->all, &mesh_state_compare);
+ mesh->num_reply_addrs = 0;
+ mesh->num_reply_states = 0;
+ mesh->num_detached_states = 0;
+ mesh->num_forever_states = 0;
+ mesh->forever_first = NULL;
+ mesh->forever_last = NULL;
+ mesh->jostle_first = NULL;
+ mesh->jostle_last = NULL;
+}
+
+int mesh_make_new_space(struct mesh_area* mesh, sldns_buffer* qbuf)
+{
+ struct mesh_state* m = mesh->jostle_first;
+ /* free space is available */
+ if(mesh->num_reply_states < mesh->max_reply_states)
+ return 1;
+ /* try to kick out a jostle-list item */
+ if(m && m->reply_list && m->list_select == mesh_jostle_list) {
+ /* how old is it? */
+ struct timeval age;
+ timeval_subtract(&age, mesh->env->now_tv,
+ &m->reply_list->start_time);
+ if(timeval_smaller(&mesh->jostle_max, &age)) {
+ /* its a goner */
+ log_nametypeclass(VERB_ALGO, "query jostled out to "
+ "make space for a new one",
+ m->s.qinfo.qname, m->s.qinfo.qtype,
+ m->s.qinfo.qclass);
+ /* backup the query */
+ if(qbuf) sldns_buffer_copy(mesh->qbuf_bak, qbuf);
+ /* notify supers */
+ if(m->super_set.count > 0) {
+ verbose(VERB_ALGO, "notify supers of failure");
+ m->s.return_msg = NULL;
+ m->s.return_rcode = LDNS_RCODE_SERVFAIL;
+ mesh_walk_supers(mesh, m);
+ }
+ mesh->stats_jostled ++;
+ mesh_state_delete(&m->s);
+ /* restore the query - note that the qinfo ptr to
+ * the querybuffer is then correct again. */
+ if(qbuf) sldns_buffer_copy(qbuf, mesh->qbuf_bak);
+ return 1;
+ }
+ }
+ /* no space for new item */
+ return 0;
+}
+
+void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo,
+ uint16_t qflags, struct edns_data* edns, struct comm_reply* rep,
+ uint16_t qid)
+{
+ /* do not use CD flag from user for mesh state, we want the CD-query
+ * to receive validation anyway, to protect out cache contents and
+ * avoid bad-data in this cache that a downstream validator cannot
+ * remove from this cache */
+ struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&BIT_RD, 0);
+ int was_detached = 0;
+ int was_noreply = 0;
+ int added = 0;
+ /* does this create a new reply state? */
+ if(!s || s->list_select == mesh_no_list) {
+ if(!mesh_make_new_space(mesh, rep->c->buffer)) {
+ verbose(VERB_ALGO, "Too many queries. dropping "
+ "incoming query.");
+ comm_point_drop_reply(rep);
+ mesh->stats_dropped ++;
+ return;
+ }
+ /* for this new reply state, the reply address is free,
+ * so the limit of reply addresses does not stop reply states*/
+ } else {
+ /* protect our memory usage from storing reply addresses */
+ if(mesh->num_reply_addrs > mesh->max_reply_states*16) {
+ verbose(VERB_ALGO, "Too many requests queued. "
+ "dropping incoming query.");
+ mesh->stats_dropped++;
+ comm_point_drop_reply(rep);
+ return;
+ }
+ }
+ /* see if it already exists, if not, create one */
+ if(!s) {
+#ifdef UNBOUND_DEBUG
+ struct rbnode_t* n;
+#endif
+ s = mesh_state_create(mesh->env, qinfo, qflags&BIT_RD, 0);
+ if(!s) {
+ log_err("mesh_state_create: out of memory; SERVFAIL");
+ error_encode(rep->c->buffer, LDNS_RCODE_SERVFAIL,
+ qinfo, qid, qflags, edns);
+ comm_point_send_reply(rep);
+ return;
+ }
+#ifdef UNBOUND_DEBUG
+ n =
+#else
+ (void)
+#endif
+ rbtree_insert(&mesh->all, &s->node);
+ log_assert(n != NULL);
+ /* set detached (it is now) */
+ mesh->num_detached_states++;
+ added = 1;
+ }
+ if(!s->reply_list && !s->cb_list && s->super_set.count == 0)
+ was_detached = 1;
+ if(!s->reply_list && !s->cb_list)
+ was_noreply = 1;
+ /* add reply to s */
+ if(!mesh_state_add_reply(s, edns, rep, qid, qflags, qinfo->qname)) {
+ log_err("mesh_new_client: out of memory; SERVFAIL");
+ error_encode(rep->c->buffer, LDNS_RCODE_SERVFAIL,
+ qinfo, qid, qflags, edns);
+ comm_point_send_reply(rep);
+ if(added)
+ mesh_state_delete(&s->s);
+ return;
+ }
+ /* update statistics */
+ if(was_detached) {
+ log_assert(mesh->num_detached_states > 0);
+ mesh->num_detached_states--;
+ }
+ if(was_noreply) {
+ mesh->num_reply_states ++;
+ }
+ mesh->num_reply_addrs++;
+ if(s->list_select == mesh_no_list) {
+ /* move to either the forever or the jostle_list */
+ if(mesh->num_forever_states < mesh->max_forever_states) {
+ mesh->num_forever_states ++;
+ mesh_list_insert(s, &mesh->forever_first,
+ &mesh->forever_last);
+ s->list_select = mesh_forever_list;
+ } else {
+ mesh_list_insert(s, &mesh->jostle_first,
+ &mesh->jostle_last);
+ s->list_select = mesh_jostle_list;
+ }
+ }
+ if(added)
+ mesh_run(mesh, s, module_event_new, NULL);
+}
+
+int
+mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo,
+ uint16_t qflags, struct edns_data* edns, sldns_buffer* buf,
+ uint16_t qid, mesh_cb_func_t cb, void* cb_arg)
+{
+ struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&BIT_RD, 0);
+ int was_detached = 0;
+ int was_noreply = 0;
+ int added = 0;
+ /* there are no limits on the number of callbacks */
+
+ /* see if it already exists, if not, create one */
+ if(!s) {
+#ifdef UNBOUND_DEBUG
+ struct rbnode_t* n;
+#endif
+ s = mesh_state_create(mesh->env, qinfo, qflags&BIT_RD, 0);
+ if(!s) {
+ return 0;
+ }
+#ifdef UNBOUND_DEBUG
+ n =
+#else
+ (void)
+#endif
+ rbtree_insert(&mesh->all, &s->node);
+ log_assert(n != NULL);
+ /* set detached (it is now) */
+ mesh->num_detached_states++;
+ added = 1;
+ }
+ if(!s->reply_list && !s->cb_list && s->super_set.count == 0)
+ was_detached = 1;
+ if(!s->reply_list && !s->cb_list)
+ was_noreply = 1;
+ /* add reply to s */
+ if(!mesh_state_add_cb(s, edns, buf, cb, cb_arg, qid, qflags)) {
+ if(added)
+ mesh_state_delete(&s->s);
+ return 0;
+ }
+ /* update statistics */
+ if(was_detached) {
+ log_assert(mesh->num_detached_states > 0);
+ mesh->num_detached_states--;
+ }
+ if(was_noreply) {
+ mesh->num_reply_states ++;
+ }
+ mesh->num_reply_addrs++;
+ if(added)
+ mesh_run(mesh, s, module_event_new, NULL);
+ return 1;
+}
+
+void mesh_new_prefetch(struct mesh_area* mesh, struct query_info* qinfo,
+ uint16_t qflags, time_t leeway)
+{
+ struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&BIT_RD, 0);
+#ifdef UNBOUND_DEBUG
+ struct rbnode_t* n;
+#endif
+ /* already exists, and for a different purpose perhaps.
+ * if mesh_no_list, keep it that way. */
+ if(s) {
+ /* make it ignore the cache from now on */
+ if(!s->s.blacklist)
+ sock_list_insert(&s->s.blacklist, NULL, 0, s->s.region);
+ if(s->s.prefetch_leeway < leeway)
+ s->s.prefetch_leeway = leeway;
+ return;
+ }
+ if(!mesh_make_new_space(mesh, NULL)) {
+ verbose(VERB_ALGO, "Too many queries. dropped prefetch.");
+ mesh->stats_dropped ++;
+ return;
+ }
+ s = mesh_state_create(mesh->env, qinfo, qflags&BIT_RD, 0);
+ if(!s) {
+ log_err("prefetch mesh_state_create: out of memory");
+ return;
+ }
+#ifdef UNBOUND_DEBUG
+ n =
+#else
+ (void)
+#endif
+ rbtree_insert(&mesh->all, &s->node);
+ log_assert(n != NULL);
+ /* set detached (it is now) */
+ mesh->num_detached_states++;
+ /* make it ignore the cache */
+ sock_list_insert(&s->s.blacklist, NULL, 0, s->s.region);
+ s->s.prefetch_leeway = leeway;
+
+ if(s->list_select == mesh_no_list) {
+ /* move to either the forever or the jostle_list */
+ if(mesh->num_forever_states < mesh->max_forever_states) {
+ mesh->num_forever_states ++;
+ mesh_list_insert(s, &mesh->forever_first,
+ &mesh->forever_last);
+ s->list_select = mesh_forever_list;
+ } else {
+ mesh_list_insert(s, &mesh->jostle_first,
+ &mesh->jostle_last);
+ s->list_select = mesh_jostle_list;
+ }
+ }
+ mesh_run(mesh, s, module_event_new, NULL);
+}
+
+void mesh_report_reply(struct mesh_area* mesh, struct outbound_entry* e,
+ struct comm_reply* reply, int what)
+{
+ enum module_ev event = module_event_reply;
+ e->qstate->reply = reply;
+ if(what != NETEVENT_NOERROR) {
+ event = module_event_noreply;
+ if(what == NETEVENT_CAPSFAIL)
+ event = module_event_capsfail;
+ }
+ mesh_run(mesh, e->qstate->mesh_info, event, e);
+}
+
+struct mesh_state*
+mesh_state_create(struct module_env* env, struct query_info* qinfo,
+ uint16_t qflags, int prime)
+{
+ struct regional* region = alloc_reg_obtain(env->alloc);
+ struct mesh_state* mstate;
+ int i;
+ if(!region)
+ return NULL;
+ mstate = (struct mesh_state*)regional_alloc(region,
+ sizeof(struct mesh_state));
+ if(!mstate) {
+ alloc_reg_release(env->alloc, region);
+ return NULL;
+ }
+ memset(mstate, 0, sizeof(*mstate));
+ mstate->node = *RBTREE_NULL;
+ mstate->run_node = *RBTREE_NULL;
+ mstate->node.key = mstate;
+ mstate->run_node.key = mstate;
+ mstate->reply_list = NULL;
+ mstate->list_select = mesh_no_list;
+ mstate->replies_sent = 0;
+ rbtree_init(&mstate->super_set, &mesh_state_ref_compare);
+ rbtree_init(&mstate->sub_set, &mesh_state_ref_compare);
+ mstate->num_activated = 0;
+ /* init module qstate */
+ mstate->s.qinfo.qtype = qinfo->qtype;
+ mstate->s.qinfo.qclass = qinfo->qclass;
+ mstate->s.qinfo.qname_len = qinfo->qname_len;
+ mstate->s.qinfo.qname = regional_alloc_init(region, qinfo->qname,
+ qinfo->qname_len);
+ if(!mstate->s.qinfo.qname) {
+ alloc_reg_release(env->alloc, region);
+ return NULL;
+ }
+ /* remove all weird bits from qflags */
+ mstate->s.query_flags = (qflags & (BIT_RD|BIT_CD));
+ mstate->s.is_priming = prime;
+ mstate->s.reply = NULL;
+ mstate->s.region = region;
+ mstate->s.curmod = 0;
+ mstate->s.return_msg = 0;
+ mstate->s.return_rcode = LDNS_RCODE_NOERROR;
+ mstate->s.env = env;
+ mstate->s.mesh_info = mstate;
+ mstate->s.prefetch_leeway = 0;
+ /* init modules */
+ for(i=0; i<env->mesh->mods.num; i++) {
+ mstate->s.minfo[i] = NULL;
+ mstate->s.ext_state[i] = module_state_initial;
+ }
+ return mstate;
+}
+
+void
+mesh_state_cleanup(struct mesh_state* mstate)
+{
+ struct mesh_area* mesh;
+ int i;
+ if(!mstate)
+ return;
+ mesh = mstate->s.env->mesh;
+ /* drop unsent replies */
+ if(!mstate->replies_sent) {
+ struct mesh_reply* rep;
+ struct mesh_cb* cb;
+ for(rep=mstate->reply_list; rep; rep=rep->next) {
+ comm_point_drop_reply(&rep->query_reply);
+ mesh->num_reply_addrs--;
+ }
+ for(cb=mstate->cb_list; cb; cb=cb->next) {
+ fptr_ok(fptr_whitelist_mesh_cb(cb->cb));
+ (*cb->cb)(cb->cb_arg, LDNS_RCODE_SERVFAIL, NULL,
+ sec_status_unchecked, NULL);
+ mesh->num_reply_addrs--;
+ }
+ }
+
+ /* de-init modules */
+ for(i=0; i<mesh->mods.num; i++) {
+ fptr_ok(fptr_whitelist_mod_clear(mesh->mods.mod[i]->clear));
+ (*mesh->mods.mod[i]->clear)(&mstate->s, i);
+ mstate->s.minfo[i] = NULL;
+ mstate->s.ext_state[i] = module_finished;
+ }
+ alloc_reg_release(mstate->s.env->alloc, mstate->s.region);
+}
+
+void
+mesh_state_delete(struct module_qstate* qstate)
+{
+ struct mesh_area* mesh;
+ struct mesh_state_ref* super, ref;
+ struct mesh_state* mstate;
+ if(!qstate)
+ return;
+ mstate = qstate->mesh_info;
+ mesh = mstate->s.env->mesh;
+ mesh_detach_subs(&mstate->s);
+ if(mstate->list_select == mesh_forever_list) {
+ mesh->num_forever_states --;
+ mesh_list_remove(mstate, &mesh->forever_first,
+ &mesh->forever_last);
+ } else if(mstate->list_select == mesh_jostle_list) {
+ mesh_list_remove(mstate, &mesh->jostle_first,
+ &mesh->jostle_last);
+ }
+ if(!mstate->reply_list && !mstate->cb_list
+ && mstate->super_set.count == 0) {
+ log_assert(mesh->num_detached_states > 0);
+ mesh->num_detached_states--;
+ }
+ if(mstate->reply_list || mstate->cb_list) {
+ log_assert(mesh->num_reply_states > 0);
+ mesh->num_reply_states--;
+ }
+ ref.node.key = &ref;
+ ref.s = mstate;
+ RBTREE_FOR(super, struct mesh_state_ref*, &mstate->super_set) {
+ (void)rbtree_delete(&super->s->sub_set, &ref);
+ }
+ (void)rbtree_delete(&mesh->run, mstate);
+ (void)rbtree_delete(&mesh->all, mstate);
+ mesh_state_cleanup(mstate);
+}
+
+/** helper recursive rbtree find routine */
+static int
+find_in_subsub(struct mesh_state* m, struct mesh_state* tofind, size_t *c)
+{
+ struct mesh_state_ref* r;
+ if((*c)++ > MESH_MAX_SUBSUB)
+ return 1;
+ RBTREE_FOR(r, struct mesh_state_ref*, &m->sub_set) {
+ if(r->s == tofind || find_in_subsub(r->s, tofind, c))
+ return 1;
+ }
+ return 0;
+}
+
+/** find cycle for already looked up mesh_state */
+static int
+mesh_detect_cycle_found(struct module_qstate* qstate, struct mesh_state* dep_m)
+{
+ struct mesh_state* cyc_m = qstate->mesh_info;
+ size_t counter = 0;
+ if(!dep_m)
+ return 0;
+ if(dep_m == cyc_m || find_in_subsub(dep_m, cyc_m, &counter)) {
+ if(counter > MESH_MAX_SUBSUB)
+ return 2;
+ return 1;
+ }
+ return 0;
+}
+
+void mesh_detach_subs(struct module_qstate* qstate)
+{
+ struct mesh_area* mesh = qstate->env->mesh;
+ struct mesh_state_ref* ref, lookup;
+#ifdef UNBOUND_DEBUG
+ struct rbnode_t* n;
+#endif
+ lookup.node.key = &lookup;
+ lookup.s = qstate->mesh_info;
+ RBTREE_FOR(ref, struct mesh_state_ref*, &qstate->mesh_info->sub_set) {
+#ifdef UNBOUND_DEBUG
+ n =
+#else
+ (void)
+#endif
+ rbtree_delete(&ref->s->super_set, &lookup);
+ log_assert(n != NULL); /* must have been present */
+ if(!ref->s->reply_list && !ref->s->cb_list
+ && ref->s->super_set.count == 0) {
+ mesh->num_detached_states++;
+ log_assert(mesh->num_detached_states +
+ mesh->num_reply_states <= mesh->all.count);
+ }
+ }
+ rbtree_init(&qstate->mesh_info->sub_set, &mesh_state_ref_compare);
+}
+
+int mesh_attach_sub(struct module_qstate* qstate, struct query_info* qinfo,
+ uint16_t qflags, int prime, struct module_qstate** newq)
+{
+ /* find it, if not, create it */
+ struct mesh_area* mesh = qstate->env->mesh;
+ struct mesh_state* sub = mesh_area_find(mesh, qinfo, qflags, prime);
+ int was_detached;
+ if(mesh_detect_cycle_found(qstate, sub)) {
+ verbose(VERB_ALGO, "attach failed, cycle detected");
+ return 0;
+ }
+ if(!sub) {
+#ifdef UNBOUND_DEBUG
+ struct rbnode_t* n;
+#endif
+ /* create a new one */
+ sub = mesh_state_create(qstate->env, qinfo, qflags, prime);
+ if(!sub) {
+ log_err("mesh_attach_sub: out of memory");
+ return 0;
+ }
+#ifdef UNBOUND_DEBUG
+ n =
+#else
+ (void)
+#endif
+ rbtree_insert(&mesh->all, &sub->node);
+ log_assert(n != NULL);
+ /* set detached (it is now) */
+ mesh->num_detached_states++;
+ /* set new query state to run */
+#ifdef UNBOUND_DEBUG
+ n =
+#else
+ (void)
+#endif
+ rbtree_insert(&mesh->run, &sub->run_node);
+ log_assert(n != NULL);
+ *newq = &sub->s;
+ } else
+ *newq = NULL;
+ was_detached = (sub->super_set.count == 0);
+ if(!mesh_state_attachment(qstate->mesh_info, sub))
+ return 0;
+ /* if it was a duplicate attachment, the count was not zero before */
+ if(!sub->reply_list && !sub->cb_list && was_detached &&
+ sub->super_set.count == 1) {
+ /* it used to be detached, before this one got added */
+ log_assert(mesh->num_detached_states > 0);
+ mesh->num_detached_states--;
+ }
+ /* *newq will be run when inited after the current module stops */
+ return 1;
+}
+
+int mesh_state_attachment(struct mesh_state* super, struct mesh_state* sub)
+{
+#ifdef UNBOUND_DEBUG
+ struct rbnode_t* n;
+#endif
+ struct mesh_state_ref* subref; /* points to sub, inserted in super */
+ struct mesh_state_ref* superref; /* points to super, inserted in sub */
+ if( !(subref = regional_alloc(super->s.region,
+ sizeof(struct mesh_state_ref))) ||
+ !(superref = regional_alloc(sub->s.region,
+ sizeof(struct mesh_state_ref))) ) {
+ log_err("mesh_state_attachment: out of memory");
+ return 0;
+ }
+ superref->node.key = superref;
+ superref->s = super;
+ subref->node.key = subref;
+ subref->s = sub;
+ if(!rbtree_insert(&sub->super_set, &superref->node)) {
+ /* this should not happen, iterator and validator do not
+ * attach subqueries that are identical. */
+ /* already attached, we are done, nothing todo.
+ * since superref and subref already allocated in region,
+ * we cannot free them */
+ return 1;
+ }
+#ifdef UNBOUND_DEBUG
+ n =
+#else
+ (void)
+#endif
+ rbtree_insert(&super->sub_set, &subref->node);
+ log_assert(n != NULL); /* we checked above if statement, the reverse
+ administration should not fail now, unless they are out of sync */
+ return 1;
+}
+
+/**
+ * callback results to mesh cb entry
+ * @param m: mesh state to send it for.
+ * @param rcode: if not 0, error code.
+ * @param rep: reply to send (or NULL if rcode is set).
+ * @param r: callback entry
+ */
+static void
+mesh_do_callback(struct mesh_state* m, int rcode, struct reply_info* rep,
+ struct mesh_cb* r)
+{
+ int secure;
+ char* reason = NULL;
+ /* bogus messages are not made into servfail, sec_status passed
+ * to the callback function */
+ if(rep && rep->security == sec_status_secure)
+ secure = 1;
+ else secure = 0;
+ if(!rep && rcode == LDNS_RCODE_NOERROR)
+ rcode = LDNS_RCODE_SERVFAIL;
+ if(!rcode && rep->security == sec_status_bogus) {
+ if(!(reason = errinf_to_str(&m->s)))
+ rcode = LDNS_RCODE_SERVFAIL;
+ }
+ /* send the reply */
+ if(rcode) {
+ fptr_ok(fptr_whitelist_mesh_cb(r->cb));
+ (*r->cb)(r->cb_arg, rcode, r->buf, sec_status_unchecked, NULL);
+ } else {
+ size_t udp_size = r->edns.udp_size;
+ sldns_buffer_clear(r->buf);
+ r->edns.edns_version = EDNS_ADVERTISED_VERSION;
+ r->edns.udp_size = EDNS_ADVERTISED_SIZE;
+ r->edns.ext_rcode = 0;
+ r->edns.bits &= EDNS_DO;
+ if(!reply_info_answer_encode(&m->s.qinfo, rep, r->qid,
+ r->qflags, r->buf, 0, 1,
+ m->s.env->scratch, udp_size, &r->edns,
+ (int)(r->edns.bits & EDNS_DO), secure))
+ {
+ fptr_ok(fptr_whitelist_mesh_cb(r->cb));
+ (*r->cb)(r->cb_arg, LDNS_RCODE_SERVFAIL, r->buf,
+ sec_status_unchecked, NULL);
+ } else {
+ fptr_ok(fptr_whitelist_mesh_cb(r->cb));
+ (*r->cb)(r->cb_arg, LDNS_RCODE_NOERROR, r->buf,
+ rep->security, reason);
+ }
+ }
+ free(reason);
+ m->s.env->mesh->num_reply_addrs--;
+}
+
+/**
+ * Send reply to mesh reply entry
+ * @param m: mesh state to send it for.
+ * @param rcode: if not 0, error code.
+ * @param rep: reply to send (or NULL if rcode is set).
+ * @param r: reply entry
+ * @param prev: previous reply, already has its answer encoded in buffer.
+ */
+static void
+mesh_send_reply(struct mesh_state* m, int rcode, struct reply_info* rep,
+ struct mesh_reply* r, struct mesh_reply* prev)
+{
+ struct timeval end_time;
+ struct timeval duration;
+ int secure;
+ /* examine security status */
+ if(m->s.env->need_to_validate && (!(r->qflags&BIT_CD) ||
+ m->s.env->cfg->ignore_cd) && rep &&
+ rep->security <= sec_status_bogus) {
+ rcode = LDNS_RCODE_SERVFAIL;
+ if(m->s.env->cfg->stat_extended)
+ m->s.env->mesh->ans_bogus++;
+ }
+ if(rep && rep->security == sec_status_secure)
+ secure = 1;
+ else secure = 0;
+ if(!rep && rcode == LDNS_RCODE_NOERROR)
+ rcode = LDNS_RCODE_SERVFAIL;
+ /* send the reply */
+ if(prev && prev->qflags == r->qflags &&
+ prev->edns.edns_present == r->edns.edns_present &&
+ prev->edns.bits == r->edns.bits &&
+ prev->edns.udp_size == r->edns.udp_size) {
+ /* if the previous reply is identical to this one, fix ID */
+ if(prev->query_reply.c->buffer != r->query_reply.c->buffer)
+ sldns_buffer_copy(r->query_reply.c->buffer,
+ prev->query_reply.c->buffer);
+ sldns_buffer_write_at(r->query_reply.c->buffer, 0,
+ &r->qid, sizeof(uint16_t));
+ sldns_buffer_write_at(r->query_reply.c->buffer, 12,
+ r->qname, m->s.qinfo.qname_len);
+ comm_point_send_reply(&r->query_reply);
+ } else if(rcode) {
+ m->s.qinfo.qname = r->qname;
+ error_encode(r->query_reply.c->buffer, rcode, &m->s.qinfo,
+ r->qid, r->qflags, &r->edns);
+ comm_point_send_reply(&r->query_reply);
+ } else {
+ size_t udp_size = r->edns.udp_size;
+ r->edns.edns_version = EDNS_ADVERTISED_VERSION;
+ r->edns.udp_size = EDNS_ADVERTISED_SIZE;
+ r->edns.ext_rcode = 0;
+ r->edns.bits &= EDNS_DO;
+ m->s.qinfo.qname = r->qname;
+ if(!reply_info_answer_encode(&m->s.qinfo, rep, r->qid,
+ r->qflags, r->query_reply.c->buffer, 0, 1,
+ m->s.env->scratch, udp_size, &r->edns,
+ (int)(r->edns.bits & EDNS_DO), secure))
+ {
+ error_encode(r->query_reply.c->buffer,
+ LDNS_RCODE_SERVFAIL, &m->s.qinfo, r->qid,
+ r->qflags, &r->edns);
+ }
+ comm_point_send_reply(&r->query_reply);
+ }
+ /* account */
+ m->s.env->mesh->num_reply_addrs--;
+ end_time = *m->s.env->now_tv;
+ timeval_subtract(&duration, &end_time, &r->start_time);
+ verbose(VERB_ALGO, "query took " ARG_LL "d.%6.6d sec",
+ (long long)duration.tv_sec, (int)duration.tv_usec);
+ m->s.env->mesh->replies_sent++;
+ timeval_add(&m->s.env->mesh->replies_sum_wait, &duration);
+ timehist_insert(m->s.env->mesh->histogram, &duration);
+ if(m->s.env->cfg->stat_extended) {
+ uint16_t rc = FLAGS_GET_RCODE(sldns_buffer_read_u16_at(r->
+ query_reply.c->buffer, 2));
+ if(secure) m->s.env->mesh->ans_secure++;
+ m->s.env->mesh->ans_rcode[ rc ] ++;
+ if(rc == 0 && LDNS_ANCOUNT(sldns_buffer_begin(r->
+ query_reply.c->buffer)) == 0)
+ m->s.env->mesh->ans_nodata++;
+ }
+}
+
+void mesh_query_done(struct mesh_state* mstate)
+{
+ struct mesh_reply* r;
+ struct mesh_reply* prev = NULL;
+ struct mesh_cb* c;
+ struct reply_info* rep = (mstate->s.return_msg?
+ mstate->s.return_msg->rep:NULL);
+ for(r = mstate->reply_list; r; r = r->next) {
+ mesh_send_reply(mstate, mstate->s.return_rcode, rep, r, prev);
+ prev = r;
+ }
+ mstate->replies_sent = 1;
+ for(c = mstate->cb_list; c; c = c->next) {
+ mesh_do_callback(mstate, mstate->s.return_rcode, rep, c);
+ }
+}
+
+void mesh_walk_supers(struct mesh_area* mesh, struct mesh_state* mstate)
+{
+ struct mesh_state_ref* ref;
+ RBTREE_FOR(ref, struct mesh_state_ref*, &mstate->super_set)
+ {
+ /* make super runnable */
+ (void)rbtree_insert(&mesh->run, &ref->s->run_node);
+ /* callback the function to inform super of result */
+ fptr_ok(fptr_whitelist_mod_inform_super(
+ mesh->mods.mod[ref->s->s.curmod]->inform_super));
+ (*mesh->mods.mod[ref->s->s.curmod]->inform_super)(&mstate->s,
+ ref->s->s.curmod, &ref->s->s);
+ }
+}
+
+struct mesh_state* mesh_area_find(struct mesh_area* mesh,
+ struct query_info* qinfo, uint16_t qflags, int prime)
+{
+ struct mesh_state key;
+ struct mesh_state* result;
+
+ key.node.key = &key;
+ key.s.is_priming = prime;
+ key.s.qinfo = *qinfo;
+ key.s.query_flags = qflags;
+
+ result = (struct mesh_state*)rbtree_search(&mesh->all, &key);
+ return result;
+}
+
+int mesh_state_add_cb(struct mesh_state* s, struct edns_data* edns,
+ sldns_buffer* buf, mesh_cb_func_t cb, void* cb_arg,
+ uint16_t qid, uint16_t qflags)
+{
+ struct mesh_cb* r = regional_alloc(s->s.region,
+ sizeof(struct mesh_cb));
+ if(!r)
+ return 0;
+ r->buf = buf;
+ log_assert(fptr_whitelist_mesh_cb(cb)); /* early failure ifmissing*/
+ r->cb = cb;
+ r->cb_arg = cb_arg;
+ r->edns = *edns;
+ r->qid = qid;
+ r->qflags = qflags;
+ r->next = s->cb_list;
+ s->cb_list = r;
+ return 1;
+
+}
+
+int mesh_state_add_reply(struct mesh_state* s, struct edns_data* edns,
+ struct comm_reply* rep, uint16_t qid, uint16_t qflags, uint8_t* qname)
+{
+ struct mesh_reply* r = regional_alloc(s->s.region,
+ sizeof(struct mesh_reply));
+ if(!r)
+ return 0;
+ r->query_reply = *rep;
+ r->edns = *edns;
+ r->qid = qid;
+ r->qflags = qflags;
+ r->start_time = *s->s.env->now_tv;
+ r->next = s->reply_list;
+ r->qname = regional_alloc_init(s->s.region, qname,
+ s->s.qinfo.qname_len);
+ if(!r->qname)
+ return 0;
+ s->reply_list = r;
+ return 1;
+
+}
+
+/**
+ * Continue processing the mesh state at another module.
+ * Handles module to modules tranfer of control.
+ * Handles module finished.
+ * @param mesh: the mesh area.
+ * @param mstate: currently active mesh state.
+ * Deleted if finished, calls _done and _supers to
+ * send replies to clients and inform other mesh states.
+ * This in turn may create additional runnable mesh states.
+ * @param s: state at which the current module exited.
+ * @param ev: the event sent to the module.
+ * returned is the event to send to the next module.
+ * @return true if continue processing at the new module.
+ * false if not continued processing is needed.
+ */
+static int
+mesh_continue(struct mesh_area* mesh, struct mesh_state* mstate,
+ enum module_ext_state s, enum module_ev* ev)
+{
+ mstate->num_activated++;
+ if(mstate->num_activated > MESH_MAX_ACTIVATION) {
+ /* module is looping. Stop it. */
+ log_err("internal error: looping module stopped");
+ log_query_info(VERB_QUERY, "pass error for qstate",
+ &mstate->s.qinfo);
+ s = module_error;
+ }
+ if(s == module_wait_module || s == module_restart_next) {
+ /* start next module */
+ mstate->s.curmod++;
+ if(mesh->mods.num == mstate->s.curmod) {
+ log_err("Cannot pass to next module; at last module");
+ log_query_info(VERB_QUERY, "pass error for qstate",
+ &mstate->s.qinfo);
+ mstate->s.curmod--;
+ return mesh_continue(mesh, mstate, module_error, ev);
+ }
+ if(s == module_restart_next) {
+ fptr_ok(fptr_whitelist_mod_clear(
+ mesh->mods.mod[mstate->s.curmod]->clear));
+ (*mesh->mods.mod[mstate->s.curmod]->clear)
+ (&mstate->s, mstate->s.curmod);
+ mstate->s.minfo[mstate->s.curmod] = NULL;
+ }
+ *ev = module_event_pass;
+ return 1;
+ }
+ if(s == module_error && mstate->s.return_rcode == LDNS_RCODE_NOERROR) {
+ /* error is bad, handle pass back up below */
+ mstate->s.return_rcode = LDNS_RCODE_SERVFAIL;
+ }
+ if(s == module_error || s == module_finished) {
+ if(mstate->s.curmod == 0) {
+ mesh_query_done(mstate);
+ mesh_walk_supers(mesh, mstate);
+ mesh_state_delete(&mstate->s);
+ return 0;
+ }
+ /* pass along the locus of control */
+ mstate->s.curmod --;
+ *ev = module_event_moddone;
+ return 1;
+ }
+ return 0;
+}
+
+void mesh_run(struct mesh_area* mesh, struct mesh_state* mstate,
+ enum module_ev ev, struct outbound_entry* e)
+{
+ enum module_ext_state s;
+ verbose(VERB_ALGO, "mesh_run: start");
+ while(mstate) {
+ /* run the module */
+ fptr_ok(fptr_whitelist_mod_operate(
+ mesh->mods.mod[mstate->s.curmod]->operate));
+ (*mesh->mods.mod[mstate->s.curmod]->operate)
+ (&mstate->s, ev, mstate->s.curmod, e);
+
+ /* examine results */
+ mstate->s.reply = NULL;
+ regional_free_all(mstate->s.env->scratch);
+ s = mstate->s.ext_state[mstate->s.curmod];
+ verbose(VERB_ALGO, "mesh_run: %s module exit state is %s",
+ mesh->mods.mod[mstate->s.curmod]->name, strextstate(s));
+ e = NULL;
+ if(mesh_continue(mesh, mstate, s, &ev))
+ continue;
+
+ /* run more modules */
+ ev = module_event_pass;
+ if(mesh->run.count > 0) {
+ /* pop random element off the runnable tree */
+ mstate = (struct mesh_state*)mesh->run.root->key;
+ (void)rbtree_delete(&mesh->run, mstate);
+ } else mstate = NULL;
+ }
+ if(verbosity >= VERB_ALGO) {
+ mesh_stats(mesh, "mesh_run: end");
+ mesh_log_list(mesh);
+ }
+}
+
+void
+mesh_log_list(struct mesh_area* mesh)
+{
+ char buf[30];
+ struct mesh_state* m;
+ int num = 0;
+ RBTREE_FOR(m, struct mesh_state*, &mesh->all) {
+ snprintf(buf, sizeof(buf), "%d%s%s%s%s%s mod%d %s%s",
+ num++, (m->s.is_priming)?"p":"", /* prime */
+ (m->s.query_flags&BIT_RD)?"RD":"",
+ (m->s.query_flags&BIT_CD)?"CD":"",
+ (m->super_set.count==0)?"d":"", /* detached */
+ (m->sub_set.count!=0)?"c":"", /* children */
+ m->s.curmod, (m->reply_list)?"rep":"", /*hasreply*/
+ (m->cb_list)?"cb":"" /* callbacks */
+ );
+ log_query_info(VERB_ALGO, buf, &m->s.qinfo);
+ }
+}
+
+void
+mesh_stats(struct mesh_area* mesh, const char* str)
+{
+ verbose(VERB_DETAIL, "%s %u recursion states (%u with reply, "
+ "%u detached), %u waiting replies, %u recursion replies "
+ "sent, %d replies dropped, %d states jostled out",
+ str, (unsigned)mesh->all.count,
+ (unsigned)mesh->num_reply_states,
+ (unsigned)mesh->num_detached_states,
+ (unsigned)mesh->num_reply_addrs,
+ (unsigned)mesh->replies_sent,
+ (unsigned)mesh->stats_dropped,
+ (unsigned)mesh->stats_jostled);
+ if(mesh->replies_sent > 0) {
+ struct timeval avg;
+ timeval_divide(&avg, &mesh->replies_sum_wait,
+ mesh->replies_sent);
+ log_info("average recursion processing time "
+ ARG_LL "d.%6.6d sec",
+ (long long)avg.tv_sec, (int)avg.tv_usec);
+ log_info("histogram of recursion processing times");
+ timehist_log(mesh->histogram, "recursions");
+ }
+}
+
+void
+mesh_stats_clear(struct mesh_area* mesh)
+{
+ if(!mesh)
+ return;
+ mesh->replies_sent = 0;
+ mesh->replies_sum_wait.tv_sec = 0;
+ mesh->replies_sum_wait.tv_usec = 0;
+ mesh->stats_jostled = 0;
+ mesh->stats_dropped = 0;
+ timehist_clear(mesh->histogram);
+ mesh->ans_secure = 0;
+ mesh->ans_bogus = 0;
+ memset(&mesh->ans_rcode[0], 0, sizeof(size_t)*16);
+ mesh->ans_nodata = 0;
+}
+
+size_t
+mesh_get_mem(struct mesh_area* mesh)
+{
+ struct mesh_state* m;
+ size_t s = sizeof(*mesh) + sizeof(struct timehist) +
+ sizeof(struct th_buck)*mesh->histogram->num +
+ sizeof(sldns_buffer) + sldns_buffer_capacity(mesh->qbuf_bak);
+ RBTREE_FOR(m, struct mesh_state*, &mesh->all) {
+ /* all, including m itself allocated in qstate region */
+ s += regional_get_mem(m->s.region);
+ }
+ return s;
+}
+
+int
+mesh_detect_cycle(struct module_qstate* qstate, struct query_info* qinfo,
+ uint16_t flags, int prime)
+{
+ struct mesh_area* mesh = qstate->env->mesh;
+ struct mesh_state* dep_m = mesh_area_find(mesh, qinfo, flags, prime);
+ return mesh_detect_cycle_found(qstate, dep_m);
+}
+
+void mesh_list_insert(struct mesh_state* m, struct mesh_state** fp,
+ struct mesh_state** lp)
+{
+ /* insert as last element */
+ m->prev = *lp;
+ m->next = NULL;
+ if(*lp)
+ (*lp)->next = m;
+ else *fp = m;
+ *lp = m;
+}
+
+void mesh_list_remove(struct mesh_state* m, struct mesh_state** fp,
+ struct mesh_state** lp)
+{
+ if(m->next)
+ m->next->prev = m->prev;
+ else *lp = m->prev;
+ if(m->prev)
+ m->prev->next = m->next;
+ else *fp = m->next;
+}
diff --git a/external/unbound/services/mesh.h b/external/unbound/services/mesh.h
new file mode 100644
index 000000000..fbfbbcb4a
--- /dev/null
+++ b/external/unbound/services/mesh.h
@@ -0,0 +1,572 @@
+/*
+ * services/mesh.h - deal with mesh of query states and handle events for that.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains functions to assist in dealing with a mesh of
+ * query states. This mesh is supposed to be thread-specific.
+ * It consists of query states (per qname, qtype, qclass) and connections
+ * between query states and the super and subquery states, and replies to
+ * send back to clients.
+ */
+
+#ifndef SERVICES_MESH_H
+#define SERVICES_MESH_H
+
+#include "util/rbtree.h"
+#include "util/netevent.h"
+#include "util/data/msgparse.h"
+#include "util/module.h"
+#include "services/modstack.h"
+struct sldns_buffer;
+struct mesh_state;
+struct mesh_reply;
+struct mesh_cb;
+struct query_info;
+struct reply_info;
+struct outbound_entry;
+struct timehist;
+
+/**
+ * Maximum number of mesh state activations. Any more is likely an
+ * infinite loop in the module. It is then terminated.
+ */
+#define MESH_MAX_ACTIVATION 3000
+
+/**
+ * Max number of references-to-references-to-references.. search size.
+ * Any more is treated like 'too large', and the creation of a new
+ * dependency is failed (so that no loops can be created).
+ */
+#define MESH_MAX_SUBSUB 1024
+
+/**
+ * Mesh of query states
+ */
+struct mesh_area {
+ /** active module stack */
+ struct module_stack mods;
+ /** environment for new states */
+ struct module_env* env;
+
+ /** set of runnable queries (mesh_state.run_node) */
+ rbtree_t run;
+ /** rbtree of all current queries (mesh_state.node)*/
+ rbtree_t all;
+
+ /** count of the total number of mesh_reply entries */
+ size_t num_reply_addrs;
+ /** count of the number of mesh_states that have mesh_replies
+ * Because a state can send results to multiple reply addresses,
+ * this number must be equal or lower than num_reply_addrs. */
+ size_t num_reply_states;
+ /** number of mesh_states that have no mesh_replies, and also
+ * an empty set of super-states, thus are 'toplevel' or detached
+ * internal opportunistic queries */
+ size_t num_detached_states;
+ /** number of reply states in the forever list */
+ size_t num_forever_states;
+
+ /** max total number of reply states to have */
+ size_t max_reply_states;
+ /** max forever number of reply states to have */
+ size_t max_forever_states;
+
+ /** stats, cumulative number of reply states jostled out */
+ size_t stats_jostled;
+ /** stats, cumulative number of incoming client msgs dropped */
+ size_t stats_dropped;
+ /** number of replies sent */
+ size_t replies_sent;
+ /** sum of waiting times for the replies */
+ struct timeval replies_sum_wait;
+ /** histogram of time values */
+ struct timehist* histogram;
+ /** (extended stats) secure replies */
+ size_t ans_secure;
+ /** (extended stats) bogus replies */
+ size_t ans_bogus;
+ /** (extended stats) rcodes in replies */
+ size_t ans_rcode[16];
+ /** (extended stats) rcode nodata in replies */
+ size_t ans_nodata;
+
+ /** backup of query if other operations recurse and need the
+ * network buffers */
+ struct sldns_buffer* qbuf_bak;
+
+ /** double linked list of the run-to-completion query states.
+ * These are query states with a reply */
+ struct mesh_state* forever_first;
+ /** last entry in run forever list */
+ struct mesh_state* forever_last;
+
+ /** double linked list of the query states that can be jostled out
+ * by new queries if too old. These are query states with a reply */
+ struct mesh_state* jostle_first;
+ /** last entry in jostle list - this is the entry that is newest */
+ struct mesh_state* jostle_last;
+ /** timeout for jostling. if age is lower, it does not get jostled. */
+ struct timeval jostle_max;
+};
+
+/**
+ * A mesh query state
+ * Unique per qname, qtype, qclass (from the qstate).
+ * And RD / CD flag; in case a client turns it off.
+ * And priming queries are different from ordinary queries (because of hints).
+ *
+ * The entire structure is allocated in a region, this region is the qstate
+ * region. All parts (rbtree nodes etc) are also allocated in the region.
+ */
+struct mesh_state {
+ /** node in mesh_area all tree, key is this struct. Must be first. */
+ rbnode_t node;
+ /** node in mesh_area runnable tree, key is this struct */
+ rbnode_t run_node;
+ /** the query state. Note that the qinfo and query_flags
+ * may not change. */
+ struct module_qstate s;
+ /** the list of replies to clients for the results */
+ struct mesh_reply* reply_list;
+ /** the list of callbacks for the results */
+ struct mesh_cb* cb_list;
+ /** set of superstates (that want this state's result)
+ * contains struct mesh_state_ref* */
+ rbtree_t super_set;
+ /** set of substates (that this state needs to continue)
+ * contains struct mesh_state_ref* */
+ rbtree_t sub_set;
+ /** number of activations for the mesh state */
+ size_t num_activated;
+
+ /** previous in linked list for reply states */
+ struct mesh_state* prev;
+ /** next in linked list for reply states */
+ struct mesh_state* next;
+ /** if this state is in the forever list, jostle list, or neither */
+ enum mesh_list_select { mesh_no_list, mesh_forever_list,
+ mesh_jostle_list } list_select;
+
+ /** true if replies have been sent out (at end for alignment) */
+ uint8_t replies_sent;
+};
+
+/**
+ * Rbtree reference to a mesh_state.
+ * Used in super_set and sub_set.
+ */
+struct mesh_state_ref {
+ /** node in rbtree for set, key is this structure */
+ rbnode_t node;
+ /** the mesh state */
+ struct mesh_state* s;
+};
+
+/**
+ * Reply to a client
+ */
+struct mesh_reply {
+ /** next in reply list */
+ struct mesh_reply* next;
+ /** the query reply destination, packet buffer and where to send. */
+ struct comm_reply query_reply;
+ /** edns data from query */
+ struct edns_data edns;
+ /** the time when request was entered */
+ struct timeval start_time;
+ /** id of query, in network byteorder. */
+ uint16_t qid;
+ /** flags of query, for reply flags */
+ uint16_t qflags;
+ /** qname from this query. len same as mesh qinfo. */
+ uint8_t* qname;
+};
+
+/**
+ * Mesh result callback func.
+ * called as func(cb_arg, rcode, buffer_with_reply, security, why_bogus);
+ */
+typedef void (*mesh_cb_func_t)(void*, int, struct sldns_buffer*, enum sec_status,
+ char*);
+
+/**
+ * Callback to result routine
+ */
+struct mesh_cb {
+ /** next in list */
+ struct mesh_cb* next;
+ /** edns data from query */
+ struct edns_data edns;
+ /** id of query, in network byteorder. */
+ uint16_t qid;
+ /** flags of query, for reply flags */
+ uint16_t qflags;
+ /** buffer for reply */
+ struct sldns_buffer* buf;
+
+ /** callback routine for results. if rcode != 0 buf has message.
+ * called as cb(cb_arg, rcode, buf, sec_state);
+ */
+ mesh_cb_func_t cb;
+ /** user arg for callback */
+ void* cb_arg;
+};
+
+/* ------------------- Functions for worker -------------------- */
+
+/**
+ * Allocate mesh, to empty.
+ * @param stack: module stack to activate, copied (as readonly reference).
+ * @param env: environment for new queries.
+ * @return mesh: the new mesh or NULL on error.
+ */
+struct mesh_area* mesh_create(struct module_stack* stack,
+ struct module_env* env);
+
+/**
+ * Delete mesh, and all query states and replies in it.
+ * @param mesh: the mesh to delete.
+ */
+void mesh_delete(struct mesh_area* mesh);
+
+/**
+ * New query incoming from clients. Create new query state if needed, and
+ * add mesh_reply to it. Returns error to client on malloc failures.
+ * Will run the mesh area queries to process if a new query state is created.
+ *
+ * @param mesh: the mesh.
+ * @param qinfo: query from client.
+ * @param qflags: flags from client query.
+ * @param edns: edns data from client query.
+ * @param rep: where to reply to.
+ * @param qid: query id to reply with.
+ */
+void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo,
+ uint16_t qflags, struct edns_data* edns, struct comm_reply* rep,
+ uint16_t qid);
+
+/**
+ * New query with callback. Create new query state if needed, and
+ * add mesh_cb to it.
+ * Will run the mesh area queries to process if a new query state is created.
+ *
+ * @param mesh: the mesh.
+ * @param qinfo: query from client.
+ * @param qflags: flags from client query.
+ * @param edns: edns data from client query.
+ * @param buf: buffer for reply contents.
+ * @param qid: query id to reply with.
+ * @param cb: callback function.
+ * @param cb_arg: callback user arg.
+ * @return 0 on error.
+ */
+int mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo,
+ uint16_t qflags, struct edns_data* edns, struct sldns_buffer* buf,
+ uint16_t qid, mesh_cb_func_t cb, void* cb_arg);
+
+/**
+ * New prefetch message. Create new query state if needed.
+ * Will run the mesh area queries to process if a new query state is created.
+ *
+ * @param mesh: the mesh.
+ * @param qinfo: query from client.
+ * @param qflags: flags from client query.
+ * @param leeway: TTL leeway what to expire earlier for this update.
+ */
+void mesh_new_prefetch(struct mesh_area* mesh, struct query_info* qinfo,
+ uint16_t qflags, time_t leeway);
+
+/**
+ * Handle new event from the wire. A serviced query has returned.
+ * The query state will be made runnable, and the mesh_area will process
+ * query states until processing is complete.
+ *
+ * @param mesh: the query mesh.
+ * @param e: outbound entry, with query state to run and reply pointer.
+ * @param reply: the comm point reply info.
+ * @param what: NETEVENT_* error code (if not 0, what is wrong, TIMEOUT).
+ */
+void mesh_report_reply(struct mesh_area* mesh, struct outbound_entry* e,
+ struct comm_reply* reply, int what);
+
+/* ------------------- Functions for module environment --------------- */
+
+/**
+ * Detach-subqueries.
+ * Remove all sub-query references from this query state.
+ * Keeps super-references of those sub-queries correct.
+ * Updates stat items in mesh_area structure.
+ * @param qstate: used to find mesh state.
+ */
+void mesh_detach_subs(struct module_qstate* qstate);
+
+/**
+ * Attach subquery.
+ * Creates it if it does not exist already.
+ * Keeps sub and super references correct.
+ * Performs a cycle detection - for double check - and fails if there is one.
+ * Also fails if the sub-sub-references become too large.
+ * Updates stat items in mesh_area structure.
+ * Pass if it is priming query or not.
+ * return:
+ * o if error (malloc) happened.
+ * o need to initialise the new state (module init; it is a new state).
+ * so that the next run of the query with this module is successful.
+ * o no init needed, attachment successful.
+ *
+ * @param qstate: the state to find mesh state, and that wants to receive
+ * the results from the new subquery.
+ * @param qinfo: what to query for (copied).
+ * @param qflags: what flags to use (RD / CD flag or not).
+ * @param prime: if it is a (stub) priming query.
+ * @param newq: If the new subquery needs initialisation, it is returned,
+ * otherwise NULL is returned.
+ * @return: false on error, true if success (and init may be needed).
+ */
+int mesh_attach_sub(struct module_qstate* qstate, struct query_info* qinfo,
+ uint16_t qflags, int prime, struct module_qstate** newq);
+
+/**
+ * Query state is done, send messages to reply entries.
+ * Encode messages using reply entry values and the querystate (with original
+ * qinfo), using given reply_info.
+ * Pass errcode != 0 if an error reply is needed.
+ * If no reply entries, nothing is done.
+ * Must be called before a module can module_finished or return module_error.
+ * The module must handle the super query states itself as well.
+ *
+ * @param mstate: mesh state that is done. return_rcode and return_msg
+ * are used for replies.
+ * return_rcode: if not 0 (NOERROR) an error is sent back (and
+ * return_msg is ignored).
+ * return_msg: reply to encode and send back to clients.
+ */
+void mesh_query_done(struct mesh_state* mstate);
+
+/**
+ * Call inform_super for the super query states that are interested in the
+ * results from this query state. These can then be changed for error
+ * or results.
+ * Called when a module is module_finished or returns module_error.
+ * The super query states become runnable with event module_event_pass,
+ * it calls the current module for the super with the inform_super event.
+ *
+ * @param mesh: mesh area to add newly runnable modules to.
+ * @param mstate: the state that has results, used to find mesh state.
+ */
+void mesh_walk_supers(struct mesh_area* mesh, struct mesh_state* mstate);
+
+/**
+ * Delete mesh state, cleanup and also rbtrees and so on.
+ * Will detach from all super/subnodes.
+ * @param qstate: to remove.
+ */
+void mesh_state_delete(struct module_qstate* qstate);
+
+/* ------------------- Functions for mesh -------------------- */
+
+/**
+ * Create and initialize a new mesh state and its query state
+ * Does not put the mesh state into rbtrees and so on.
+ * @param env: module environment to set.
+ * @param qinfo: query info that the mesh is for.
+ * @param qflags: flags for query (RD / CD flag).
+ * @param prime: if true, it is a priming query, set is_priming on mesh state.
+ * @return: new mesh state or NULL on allocation error.
+ */
+struct mesh_state* mesh_state_create(struct module_env* env,
+ struct query_info* qinfo, uint16_t qflags, int prime);
+
+/**
+ * Cleanup a mesh state and its query state. Does not do rbtree or
+ * reference cleanup.
+ * @param mstate: mesh state to cleanup. Its pointer may no longer be used
+ * afterwards. Cleanup rbtrees before calling this function.
+ */
+void mesh_state_cleanup(struct mesh_state* mstate);
+
+/**
+ * Delete all mesh states from the mesh.
+ * @param mesh: the mesh area to clear
+ */
+void mesh_delete_all(struct mesh_area* mesh);
+
+/**
+ * Find a mesh state in the mesh area. Pass relevant flags.
+ *
+ * @param mesh: the mesh area to look in.
+ * @param qinfo: what query
+ * @param qflags: if RD / CD bit is set or not.
+ * @param prime: if it is a priming query.
+ * @return: mesh state or NULL if not found.
+ */
+struct mesh_state* mesh_area_find(struct mesh_area* mesh,
+ struct query_info* qinfo, uint16_t qflags, int prime);
+
+/**
+ * Setup attachment super/sub relation between super and sub mesh state.
+ * The relation must not be present when calling the function.
+ * Does not update stat items in mesh_area.
+ * @param super: super state.
+ * @param sub: sub state.
+ * @return: 0 on alloc error.
+ */
+int mesh_state_attachment(struct mesh_state* super, struct mesh_state* sub);
+
+/**
+ * Create new reply structure and attach it to a mesh state.
+ * Does not update stat items in mesh area.
+ * @param s: the mesh state.
+ * @param edns: edns data for reply (bufsize).
+ * @param rep: comm point reply info.
+ * @param qid: ID of reply.
+ * @param qflags: original query flags.
+ * @param qname: original query name.
+ * @return: 0 on alloc error.
+ */
+int mesh_state_add_reply(struct mesh_state* s, struct edns_data* edns,
+ struct comm_reply* rep, uint16_t qid, uint16_t qflags, uint8_t* qname);
+
+/**
+ * Create new callback structure and attach it to a mesh state.
+ * Does not update stat items in mesh area.
+ * @param s: the mesh state.
+ * @param edns: edns data for reply (bufsize).
+ * @param buf: buffer for reply
+ * @param cb: callback to call with results.
+ * @param cb_arg: callback user arg.
+ * @param qid: ID of reply.
+ * @param qflags: original query flags.
+ * @return: 0 on alloc error.
+ */
+int mesh_state_add_cb(struct mesh_state* s, struct edns_data* edns,
+ struct sldns_buffer* buf, mesh_cb_func_t cb, void* cb_arg, uint16_t qid,
+ uint16_t qflags);
+
+/**
+ * Run the mesh. Run all runnable mesh states. Which can create new
+ * runnable mesh states. Until completion. Automatically called by
+ * mesh_report_reply and mesh_new_client as needed.
+ * @param mesh: mesh area.
+ * @param mstate: first mesh state to run.
+ * @param ev: event the mstate. Others get event_pass.
+ * @param e: if a reply, its outbound entry.
+ */
+void mesh_run(struct mesh_area* mesh, struct mesh_state* mstate,
+ enum module_ev ev, struct outbound_entry* e);
+
+/**
+ * Print some stats about the mesh to the log.
+ * @param mesh: the mesh to print it for.
+ * @param str: descriptive string to go with it.
+ */
+void mesh_stats(struct mesh_area* mesh, const char* str);
+
+/**
+ * Clear the stats that the mesh keeps (number of queries serviced)
+ * @param mesh: the mesh
+ */
+void mesh_stats_clear(struct mesh_area* mesh);
+
+/**
+ * Print all the states in the mesh to the log.
+ * @param mesh: the mesh to print all states of.
+ */
+void mesh_log_list(struct mesh_area* mesh);
+
+/**
+ * Calculate memory size in use by mesh and all queries inside it.
+ * @param mesh: the mesh to examine.
+ * @return size in bytes.
+ */
+size_t mesh_get_mem(struct mesh_area* mesh);
+
+/**
+ * Find cycle; see if the given mesh is in the targets sub, or sub-sub, ...
+ * trees.
+ * If the sub-sub structure is too large, it returns 'a cycle'=2.
+ * @param qstate: given mesh querystate.
+ * @param qinfo: query info for dependency.
+ * @param flags: query flags of dependency.
+ * @param prime: if dependency is a priming query or not.
+ * @return true if the name,type,class exists and the given qstate mesh exists
+ * as a dependency of that name. Thus if qstate becomes dependent on
+ * name,type,class then a cycle is created, this is return value 1.
+ * Too large to search is value 2 (also true).
+ */
+int mesh_detect_cycle(struct module_qstate* qstate, struct query_info* qinfo,
+ uint16_t flags, int prime);
+
+/** compare two mesh_states */
+int mesh_state_compare(const void* ap, const void* bp);
+
+/** compare two mesh references */
+int mesh_state_ref_compare(const void* ap, const void* bp);
+
+/**
+ * Make space for another recursion state for a reply in the mesh
+ * @param mesh: mesh area
+ * @param qbuf: query buffer to save if recursion is invoked to make space.
+ * This buffer is necessary, because the following sequence in calls
+ * can result in an overwrite of the incoming query:
+ * delete_other_mesh_query - iter_clean - serviced_delete - waiting
+ * udp query is sent - on error callback - callback sends SERVFAIL reply
+ * over the same network channel, and shared UDP buffer is overwritten.
+ * You can pass NULL if there is no buffer that must be backed up.
+ * @return false if no space is available.
+ */
+int mesh_make_new_space(struct mesh_area* mesh, struct sldns_buffer* qbuf);
+
+/**
+ * Insert mesh state into a double linked list. Inserted at end.
+ * @param m: mesh state.
+ * @param fp: pointer to the first-elem-pointer of the list.
+ * @param lp: pointer to the last-elem-pointer of the list.
+ */
+void mesh_list_insert(struct mesh_state* m, struct mesh_state** fp,
+ struct mesh_state** lp);
+
+/**
+ * Remove mesh state from a double linked list. Remove from any position.
+ * @param m: mesh state.
+ * @param fp: pointer to the first-elem-pointer of the list.
+ * @param lp: pointer to the last-elem-pointer of the list.
+ */
+void mesh_list_remove(struct mesh_state* m, struct mesh_state** fp,
+ struct mesh_state** lp);
+
+#endif /* SERVICES_MESH_H */
diff --git a/external/unbound/services/modstack.c b/external/unbound/services/modstack.c
new file mode 100644
index 000000000..a99030bc3
--- /dev/null
+++ b/external/unbound/services/modstack.c
@@ -0,0 +1,215 @@
+/*
+ * services/modstack.c - stack of modules
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains functions to help maintain a stack of modules.
+ */
+#include "config.h"
+#include <ctype.h>
+#include "services/modstack.h"
+#include "util/module.h"
+#include "util/fptr_wlist.h"
+#include "dns64/dns64.h"
+#include "iterator/iterator.h"
+#include "validator/validator.h"
+
+#ifdef WITH_PYTHONMODULE
+#include "pythonmod/pythonmod.h"
+#endif
+
+/** count number of modules (words) in the string */
+static int
+count_modules(const char* s)
+{
+ int num = 0;
+ if(!s)
+ return 0;
+ while(*s) {
+ /* skip whitespace */
+ while(*s && isspace((int)*s))
+ s++;
+ if(*s && !isspace((int)*s)) {
+ /* skip identifier */
+ num++;
+ while(*s && !isspace((int)*s))
+ s++;
+ }
+ }
+ return num;
+}
+
+void
+modstack_init(struct module_stack* stack)
+{
+ stack->num = 0;
+ stack->mod = NULL;
+}
+
+int
+modstack_config(struct module_stack* stack, const char* module_conf)
+{
+ int i;
+ verbose(VERB_QUERY, "module config: \"%s\"", module_conf);
+ stack->num = count_modules(module_conf);
+ if(stack->num == 0) {
+ log_err("error: no modules specified");
+ return 0;
+ }
+ if(stack->num > MAX_MODULE) {
+ log_err("error: too many modules (%d max %d)",
+ stack->num, MAX_MODULE);
+ return 0;
+ }
+ stack->mod = (struct module_func_block**)calloc((size_t)
+ stack->num, sizeof(struct module_func_block*));
+ if(!stack->mod) {
+ log_err("out of memory");
+ return 0;
+ }
+ for(i=0; i<stack->num; i++) {
+ stack->mod[i] = module_factory(&module_conf);
+ if(!stack->mod[i]) {
+ log_err("Unknown value for next module: '%s'",
+ module_conf);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/** The list of module names */
+const char**
+module_list_avail(void)
+{
+ /* these are the modules available */
+ static const char* names[] = {
+ "dns64",
+#ifdef WITH_PYTHONMODULE
+ "python",
+#endif
+ "validator",
+ "iterator",
+ NULL};
+ return names;
+}
+
+/** func block get function type */
+typedef struct module_func_block* (*fbgetfunctype)(void);
+
+/** The list of module func blocks */
+static fbgetfunctype*
+module_funcs_avail(void)
+{
+ static struct module_func_block* (*fb[])(void) = {
+ &dns64_get_funcblock,
+#ifdef WITH_PYTHONMODULE
+ &pythonmod_get_funcblock,
+#endif
+ &val_get_funcblock,
+ &iter_get_funcblock,
+ NULL};
+ return fb;
+}
+
+struct
+module_func_block* module_factory(const char** str)
+{
+ int i = 0;
+ const char* s = *str;
+ const char** names = module_list_avail();
+ fbgetfunctype* fb = module_funcs_avail();
+ while(*s && isspace((int)*s))
+ s++;
+ while(names[i]) {
+ if(strncmp(names[i], s, strlen(names[i])) == 0) {
+ s += strlen(names[i]);
+ *str = s;
+ return (*fb[i])();
+ }
+ i++;
+ }
+ return NULL;
+}
+
+int
+modstack_setup(struct module_stack* stack, const char* module_conf,
+ struct module_env* env)
+{
+ int i;
+ if(stack->num != 0)
+ modstack_desetup(stack, env);
+ /* fixed setup of the modules */
+ if(!modstack_config(stack, module_conf)) {
+ return 0;
+ }
+ env->need_to_validate = 0; /* set by module init below */
+ for(i=0; i<stack->num; i++) {
+ verbose(VERB_OPS, "init module %d: %s",
+ i, stack->mod[i]->name);
+ fptr_ok(fptr_whitelist_mod_init(stack->mod[i]->init));
+ if(!(*stack->mod[i]->init)(env, i)) {
+ log_err("module init for module %s failed",
+ stack->mod[i]->name);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+void
+modstack_desetup(struct module_stack* stack, struct module_env* env)
+{
+ int i;
+ for(i=0; i<stack->num; i++) {
+ fptr_ok(fptr_whitelist_mod_deinit(stack->mod[i]->deinit));
+ (*stack->mod[i]->deinit)(env, i);
+ }
+ stack->num = 0;
+ free(stack->mod);
+ stack->mod = NULL;
+}
+
+int
+modstack_find(struct module_stack* stack, const char* name)
+{
+ int i;
+ for(i=0; i<stack->num; i++) {
+ if(strcmp(stack->mod[i]->name, name) == 0)
+ return i;
+ }
+ return -1;
+}
diff --git a/external/unbound/services/modstack.h b/external/unbound/services/modstack.h
new file mode 100644
index 000000000..cb8613299
--- /dev/null
+++ b/external/unbound/services/modstack.h
@@ -0,0 +1,113 @@
+/*
+ * services/modstack.h - stack of modules
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains functions to help maintain a stack of modules.
+ */
+
+#ifndef SERVICES_MODSTACK_H
+#define SERVICES_MODSTACK_H
+struct module_func_block;
+struct module_env;
+
+/**
+ * Stack of modules.
+ */
+struct module_stack {
+ /** the number of modules */
+ int num;
+ /** the module callbacks, array of num_modules length (ref only) */
+ struct module_func_block** mod;
+};
+
+/**
+ * Init a stack of modules
+ * @param stack: initialised as empty.
+ */
+void modstack_init(struct module_stack* stack);
+
+/**
+ * Read config file module settings and set up the modfunc block
+ * @param stack: the stack of modules (empty before call).
+ * @param module_conf: string what modules to insert.
+ * @return false on error
+ */
+int modstack_config(struct module_stack* stack, const char* module_conf);
+
+/**
+ * Get funcblock for module name
+ * @param str: string with module name. Advanced to next value on success.
+ * The string is assumed whitespace separated list of module names.
+ * @return funcblock or NULL on error.
+ */
+struct module_func_block* module_factory(const char** str);
+
+/**
+ * Get list of modules available.
+ * @return list of modules available. Static strings, ends with NULL.
+ */
+const char** module_list_avail(void);
+
+/**
+ * Setup modules. Assigns ids and calls module_init.
+ * @param stack: if not empty beforehand, it will be desetup()ed.
+ * It is then modstack_configged().
+ * @param module_conf: string what modules to insert.
+ * @param env: module environment which is inited by the modules.
+ * environment should have a superalloc, cfg,
+ * env.need_to_validate is set by the modules.
+ * @return on false a module init failed.
+ */
+int modstack_setup(struct module_stack* stack, const char* module_conf,
+ struct module_env* env);
+
+/**
+ * Desetup the modules, deinit, delete.
+ * @param stack: made empty.
+ * @param env: module env for module deinit() calls.
+ */
+void modstack_desetup(struct module_stack* stack, struct module_env* env);
+
+/**
+ * Find index of module by name.
+ * @param stack: to look in
+ * @param name: the name to look for
+ * @return -1 on failure, otherwise index number.
+ */
+int modstack_find(struct module_stack* stack, const char* name);
+
+#endif /* SERVICES_MODSTACK_H */
diff --git a/external/unbound/services/outbound_list.c b/external/unbound/services/outbound_list.c
new file mode 100644
index 000000000..ad73380bc
--- /dev/null
+++ b/external/unbound/services/outbound_list.c
@@ -0,0 +1,89 @@
+/*
+ * services/outbound_list.c - keep list of outbound serviced queries.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains functions to help a module keep track of the
+ * queries it has outstanding to authoritative servers.
+ */
+#include "config.h"
+#include <sys/time.h>
+#include "services/outbound_list.h"
+#include "services/outside_network.h"
+
+void
+outbound_list_init(struct outbound_list* list)
+{
+ list->first = NULL;
+}
+
+void
+outbound_list_clear(struct outbound_list* list)
+{
+ struct outbound_entry *p, *np;
+ p = list->first;
+ while(p) {
+ np = p->next;
+ outnet_serviced_query_stop(p->qsent, p);
+ /* in region, no free needed */
+ p = np;
+ }
+ outbound_list_init(list);
+}
+
+void
+outbound_list_insert(struct outbound_list* list, struct outbound_entry* e)
+{
+ if(list->first)
+ list->first->prev = e;
+ e->next = list->first;
+ e->prev = NULL;
+ list->first = e;
+}
+
+void
+outbound_list_remove(struct outbound_list* list, struct outbound_entry* e)
+{
+ if(!e)
+ return;
+ outnet_serviced_query_stop(e->qsent, e);
+ if(e->next)
+ e->next->prev = e->prev;
+ if(e->prev)
+ e->prev->next = e->next;
+ else list->first = e->next;
+ /* in region, no free needed */
+}
diff --git a/external/unbound/services/outbound_list.h b/external/unbound/services/outbound_list.h
new file mode 100644
index 000000000..ad59e42d1
--- /dev/null
+++ b/external/unbound/services/outbound_list.h
@@ -0,0 +1,105 @@
+/*
+ * services/outbound_list.h - keep list of outbound serviced queries.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains functions to help a module keep track of the
+ * queries it has outstanding to authoritative servers.
+ */
+#ifndef SERVICES_OUTBOUND_LIST_H
+#define SERVICES_OUTBOUND_LIST_H
+struct outbound_entry;
+struct serviced_query;
+struct module_qstate;
+
+/**
+ * The outbound list. This structure is part of the module specific query
+ * state.
+ */
+struct outbound_list {
+ /** The linked list of outbound query entries. */
+ struct outbound_entry* first;
+};
+
+/**
+ * Outbound list entry. A serviced query sent by a module processing the
+ * query from the qstate. Double linked list to aid removal.
+ */
+struct outbound_entry {
+ /** next in list */
+ struct outbound_entry* next;
+ /** prev in list */
+ struct outbound_entry* prev;
+ /** The query that was sent out */
+ struct serviced_query* qsent;
+ /** the module query state that sent it */
+ struct module_qstate* qstate;
+};
+
+/**
+ * Init the user allocated outbound list structure
+ * @param list: the list structure.
+ */
+void outbound_list_init(struct outbound_list* list);
+
+/**
+ * Clear the user owner outbound list structure.
+ * Deletes serviced queries.
+ * @param list: the list structure. It is cleared, but the list struct itself
+ * is callers responsability to delete.
+ */
+void outbound_list_clear(struct outbound_list* list);
+
+/**
+ * Insert new entry into the list. Caller must allocate the entry with malloc.
+ * qstate and qsent are set by caller.
+ * @param list: the list to add to.
+ * @param e: entry to add, it is only half initialised at call start, fully
+ * initialised at call end.
+ */
+void outbound_list_insert(struct outbound_list* list,
+ struct outbound_entry* e);
+
+/**
+ * Remove an entry from the list, and deletes it.
+ * Deletes serviced query in the entry.
+ * @param list: the list to remove from.
+ * @param e: the entry to remove.
+ */
+void outbound_list_remove(struct outbound_list* list,
+ struct outbound_entry* e);
+
+#endif /* SERVICES_OUTBOUND_LIST_H */
diff --git a/external/unbound/services/outside_network.c b/external/unbound/services/outside_network.c
new file mode 100644
index 000000000..986c53dd8
--- /dev/null
+++ b/external/unbound/services/outside_network.c
@@ -0,0 +1,2052 @@
+/*
+ * services/outside_network.c - implement sending of queries and wait answer.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file has functions to send queries to authoritative servers and
+ * wait for the pending answer events.
+ */
+#include "config.h"
+#include <ctype.h>
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#include <sys/time.h>
+#include "services/outside_network.h"
+#include "services/listen_dnsport.h"
+#include "services/cache/infra.h"
+#include "util/data/msgparse.h"
+#include "util/data/msgreply.h"
+#include "util/data/msgencode.h"
+#include "util/data/dname.h"
+#include "util/netevent.h"
+#include "util/log.h"
+#include "util/net_help.h"
+#include "util/random.h"
+#include "util/fptr_wlist.h"
+#include "ldns/sbuffer.h"
+#include "dnstap/dnstap.h"
+#ifdef HAVE_OPENSSL_SSL_H
+#include <openssl/ssl.h>
+#endif
+
+#ifdef HAVE_NETDB_H
+#include <netdb.h>
+#endif
+#include <fcntl.h>
+
+/** number of times to retry making a random ID that is unique. */
+#define MAX_ID_RETRY 1000
+/** number of times to retry finding interface, port that can be opened. */
+#define MAX_PORT_RETRY 10000
+/** number of retries on outgoing UDP queries */
+#define OUTBOUND_UDP_RETRY 1
+
+/** initiate TCP transaction for serviced query */
+static void serviced_tcp_initiate(struct serviced_query* sq, sldns_buffer* buff);
+/** with a fd available, randomize and send UDP */
+static int randomize_and_send_udp(struct pending* pend, sldns_buffer* packet,
+ int timeout);
+
+/** remove waiting tcp from the outnet waiting list */
+static void waiting_list_remove(struct outside_network* outnet,
+ struct waiting_tcp* w);
+
+int
+pending_cmp(const void* key1, const void* key2)
+{
+ struct pending *p1 = (struct pending*)key1;
+ struct pending *p2 = (struct pending*)key2;
+ if(p1->id < p2->id)
+ return -1;
+ if(p1->id > p2->id)
+ return 1;
+ log_assert(p1->id == p2->id);
+ return sockaddr_cmp(&p1->addr, p1->addrlen, &p2->addr, p2->addrlen);
+}
+
+int
+serviced_cmp(const void* key1, const void* key2)
+{
+ struct serviced_query* q1 = (struct serviced_query*)key1;
+ struct serviced_query* q2 = (struct serviced_query*)key2;
+ int r;
+ if(q1->qbuflen < q2->qbuflen)
+ return -1;
+ if(q1->qbuflen > q2->qbuflen)
+ return 1;
+ log_assert(q1->qbuflen == q2->qbuflen);
+ log_assert(q1->qbuflen >= 15 /* 10 header, root, type, class */);
+ /* alternate casing of qname is still the same query */
+ if((r = memcmp(q1->qbuf, q2->qbuf, 10)) != 0)
+ return r;
+ if((r = memcmp(q1->qbuf+q1->qbuflen-4, q2->qbuf+q2->qbuflen-4, 4)) != 0)
+ return r;
+ if(q1->dnssec != q2->dnssec) {
+ if(q1->dnssec < q2->dnssec)
+ return -1;
+ return 1;
+ }
+ if((r = query_dname_compare(q1->qbuf+10, q2->qbuf+10)) != 0)
+ return r;
+ return sockaddr_cmp(&q1->addr, q1->addrlen, &q2->addr, q2->addrlen);
+}
+
+/** delete waiting_tcp entry. Does not unlink from waiting list.
+ * @param w: to delete.
+ */
+static void
+waiting_tcp_delete(struct waiting_tcp* w)
+{
+ if(!w) return;
+ if(w->timer)
+ comm_timer_delete(w->timer);
+ free(w);
+}
+
+/**
+ * Pick random outgoing-interface of that family, and bind it.
+ * port set to 0 so OS picks a port number for us.
+ * if it is the ANY address, do not bind.
+ * @param w: tcp structure with destination address.
+ * @param s: socket fd.
+ * @return false on error, socket closed.
+ */
+static int
+pick_outgoing_tcp(struct waiting_tcp* w, int s)
+{
+ struct port_if* pi = NULL;
+ int num;
+#ifdef INET6
+ if(addr_is_ip6(&w->addr, w->addrlen))
+ num = w->outnet->num_ip6;
+ else
+#endif
+ num = w->outnet->num_ip4;
+ if(num == 0) {
+ log_err("no TCP outgoing interfaces of family");
+ log_addr(VERB_OPS, "for addr", &w->addr, w->addrlen);
+#ifndef USE_WINSOCK
+ close(s);
+#else
+ closesocket(s);
+#endif
+ return 0;
+ }
+#ifdef INET6
+ if(addr_is_ip6(&w->addr, w->addrlen))
+ pi = &w->outnet->ip6_ifs[ub_random_max(w->outnet->rnd, num)];
+ else
+#endif
+ pi = &w->outnet->ip4_ifs[ub_random_max(w->outnet->rnd, num)];
+ log_assert(pi);
+ if(addr_is_any(&pi->addr, pi->addrlen)) {
+ /* binding to the ANY interface is for listening sockets */
+ return 1;
+ }
+ /* set port to 0 */
+ if(addr_is_ip6(&pi->addr, pi->addrlen))
+ ((struct sockaddr_in6*)&pi->addr)->sin6_port = 0;
+ else ((struct sockaddr_in*)&pi->addr)->sin_port = 0;
+ if(bind(s, (struct sockaddr*)&pi->addr, pi->addrlen) != 0) {
+#ifndef USE_WINSOCK
+ log_err("outgoing tcp: bind: %s", strerror(errno));
+ close(s);
+#else
+ log_err("outgoing tcp: bind: %s",
+ wsa_strerror(WSAGetLastError()));
+ closesocket(s);
+#endif
+ return 0;
+ }
+ log_addr(VERB_ALGO, "tcp bound to src", &pi->addr, pi->addrlen);
+ return 1;
+}
+
+/** use next free buffer to service a tcp query */
+static int
+outnet_tcp_take_into_use(struct waiting_tcp* w, uint8_t* pkt, size_t pkt_len)
+{
+ struct pending_tcp* pend = w->outnet->tcp_free;
+ int s;
+ log_assert(pend);
+ log_assert(pkt);
+ log_assert(w->addrlen > 0);
+ /* open socket */
+#ifdef INET6
+ if(addr_is_ip6(&w->addr, w->addrlen))
+ s = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP);
+ else
+#endif
+ s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
+ if(s == -1) {
+#ifndef USE_WINSOCK
+ log_err_addr("outgoing tcp: socket", strerror(errno),
+ &w->addr, w->addrlen);
+#else
+ log_err_addr("outgoing tcp: socket",
+ wsa_strerror(WSAGetLastError()), &w->addr, w->addrlen);
+#endif
+ return 0;
+ }
+ if(!pick_outgoing_tcp(w, s))
+ return 0;
+
+ fd_set_nonblock(s);
+ if(connect(s, (struct sockaddr*)&w->addr, w->addrlen) == -1) {
+#ifndef USE_WINSOCK
+#ifdef EINPROGRESS
+ if(errno != EINPROGRESS) {
+#else
+ if(1) {
+#endif
+ if(tcp_connect_errno_needs_log(
+ (struct sockaddr*)&w->addr, w->addrlen))
+ log_err_addr("outgoing tcp: connect",
+ strerror(errno), &w->addr, w->addrlen);
+ close(s);
+#else /* USE_WINSOCK */
+ if(WSAGetLastError() != WSAEINPROGRESS &&
+ WSAGetLastError() != WSAEWOULDBLOCK) {
+ closesocket(s);
+#endif
+ return 0;
+ }
+ }
+ if(w->outnet->sslctx && w->ssl_upstream) {
+ pend->c->ssl = outgoing_ssl_fd(w->outnet->sslctx, s);
+ if(!pend->c->ssl) {
+ pend->c->fd = s;
+ comm_point_close(pend->c);
+ return 0;
+ }
+#ifdef USE_WINSOCK
+ comm_point_tcp_win_bio_cb(pend->c, pend->c->ssl);
+#endif
+ pend->c->ssl_shake_state = comm_ssl_shake_write;
+ }
+ w->pkt = NULL;
+ w->next_waiting = (void*)pend;
+ pend->id = LDNS_ID_WIRE(pkt);
+ w->outnet->num_tcp_outgoing++;
+ w->outnet->tcp_free = pend->next_free;
+ pend->next_free = NULL;
+ pend->query = w;
+ pend->c->repinfo.addrlen = w->addrlen;
+ memcpy(&pend->c->repinfo.addr, &w->addr, w->addrlen);
+ sldns_buffer_clear(pend->c->buffer);
+ sldns_buffer_write(pend->c->buffer, pkt, pkt_len);
+ sldns_buffer_flip(pend->c->buffer);
+ pend->c->tcp_is_reading = 0;
+ pend->c->tcp_byte_count = 0;
+ comm_point_start_listening(pend->c, s, -1);
+ return 1;
+}
+
+/** see if buffers can be used to service TCP queries */
+static void
+use_free_buffer(struct outside_network* outnet)
+{
+ struct waiting_tcp* w;
+ while(outnet->tcp_free && outnet->tcp_wait_first
+ && !outnet->want_to_quit) {
+ w = outnet->tcp_wait_first;
+ outnet->tcp_wait_first = w->next_waiting;
+ if(outnet->tcp_wait_last == w)
+ outnet->tcp_wait_last = NULL;
+ if(!outnet_tcp_take_into_use(w, w->pkt, w->pkt_len)) {
+ comm_point_callback_t* cb = w->cb;
+ void* cb_arg = w->cb_arg;
+ waiting_tcp_delete(w);
+ fptr_ok(fptr_whitelist_pending_tcp(cb));
+ (void)(*cb)(NULL, cb_arg, NETEVENT_CLOSED, NULL);
+ }
+ }
+}
+
+/** decomission a tcp buffer, closes commpoint and frees waiting_tcp entry */
+static void
+decomission_pending_tcp(struct outside_network* outnet,
+ struct pending_tcp* pend)
+{
+ if(pend->c->ssl) {
+#ifdef HAVE_SSL
+ SSL_shutdown(pend->c->ssl);
+ SSL_free(pend->c->ssl);
+ pend->c->ssl = NULL;
+#endif
+ }
+ comm_point_close(pend->c);
+ pend->next_free = outnet->tcp_free;
+ outnet->tcp_free = pend;
+ waiting_tcp_delete(pend->query);
+ pend->query = NULL;
+ use_free_buffer(outnet);
+}
+
+int
+outnet_tcp_cb(struct comm_point* c, void* arg, int error,
+ struct comm_reply *reply_info)
+{
+ struct pending_tcp* pend = (struct pending_tcp*)arg;
+ struct outside_network* outnet = pend->query->outnet;
+ verbose(VERB_ALGO, "outnettcp cb");
+ if(error != NETEVENT_NOERROR) {
+ verbose(VERB_QUERY, "outnettcp got tcp error %d", error);
+ /* pass error below and exit */
+ } else {
+ /* check ID */
+ if(sldns_buffer_limit(c->buffer) < sizeof(uint16_t) ||
+ LDNS_ID_WIRE(sldns_buffer_begin(c->buffer))!=pend->id) {
+ log_addr(VERB_QUERY,
+ "outnettcp: bad ID in reply, from:",
+ &pend->query->addr, pend->query->addrlen);
+ error = NETEVENT_CLOSED;
+ }
+ }
+ fptr_ok(fptr_whitelist_pending_tcp(pend->query->cb));
+ (void)(*pend->query->cb)(c, pend->query->cb_arg, error, reply_info);
+ decomission_pending_tcp(outnet, pend);
+ return 0;
+}
+
+/** lower use count on pc, see if it can be closed */
+static void
+portcomm_loweruse(struct outside_network* outnet, struct port_comm* pc)
+{
+ struct port_if* pif;
+ pc->num_outstanding--;
+ if(pc->num_outstanding > 0) {
+ return;
+ }
+ /* close it and replace in unused list */
+ verbose(VERB_ALGO, "close of port %d", pc->number);
+ comm_point_close(pc->cp);
+ pif = pc->pif;
+ log_assert(pif->inuse > 0);
+ pif->avail_ports[pif->avail_total - pif->inuse] = pc->number;
+ pif->inuse--;
+ pif->out[pc->index] = pif->out[pif->inuse];
+ pif->out[pc->index]->index = pc->index;
+ pc->next = outnet->unused_fds;
+ outnet->unused_fds = pc;
+}
+
+/** try to send waiting UDP queries */
+static void
+outnet_send_wait_udp(struct outside_network* outnet)
+{
+ struct pending* pend;
+ /* process waiting queries */
+ while(outnet->udp_wait_first && outnet->unused_fds
+ && !outnet->want_to_quit) {
+ pend = outnet->udp_wait_first;
+ outnet->udp_wait_first = pend->next_waiting;
+ if(!pend->next_waiting) outnet->udp_wait_last = NULL;
+ sldns_buffer_clear(outnet->udp_buff);
+ sldns_buffer_write(outnet->udp_buff, pend->pkt, pend->pkt_len);
+ sldns_buffer_flip(outnet->udp_buff);
+ free(pend->pkt); /* freeing now makes get_mem correct */
+ pend->pkt = NULL;
+ pend->pkt_len = 0;
+ if(!randomize_and_send_udp(pend, outnet->udp_buff,
+ pend->timeout)) {
+ /* callback error on pending */
+ if(pend->cb) {
+ fptr_ok(fptr_whitelist_pending_udp(pend->cb));
+ (void)(*pend->cb)(outnet->unused_fds->cp, pend->cb_arg,
+ NETEVENT_CLOSED, NULL);
+ }
+ pending_delete(outnet, pend);
+ }
+ }
+}
+
+int
+outnet_udp_cb(struct comm_point* c, void* arg, int error,
+ struct comm_reply *reply_info)
+{
+ struct outside_network* outnet = (struct outside_network*)arg;
+ struct pending key;
+ struct pending* p;
+ verbose(VERB_ALGO, "answer cb");
+
+ if(error != NETEVENT_NOERROR) {
+ verbose(VERB_QUERY, "outnetudp got udp error %d", error);
+ return 0;
+ }
+ if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
+ verbose(VERB_QUERY, "outnetudp udp too short");
+ return 0;
+ }
+ log_assert(reply_info);
+
+ /* setup lookup key */
+ key.id = (unsigned)LDNS_ID_WIRE(sldns_buffer_begin(c->buffer));
+ memcpy(&key.addr, &reply_info->addr, reply_info->addrlen);
+ key.addrlen = reply_info->addrlen;
+ verbose(VERB_ALGO, "Incoming reply id = %4.4x", key.id);
+ log_addr(VERB_ALGO, "Incoming reply addr =",
+ &reply_info->addr, reply_info->addrlen);
+
+ /* find it, see if this thing is a valid query response */
+ verbose(VERB_ALGO, "lookup size is %d entries", (int)outnet->pending->count);
+ p = (struct pending*)rbtree_search(outnet->pending, &key);
+ if(!p) {
+ verbose(VERB_QUERY, "received unwanted or unsolicited udp reply dropped.");
+ log_buf(VERB_ALGO, "dropped message", c->buffer);
+ outnet->unwanted_replies++;
+ if(outnet->unwanted_threshold && ++outnet->unwanted_total
+ >= outnet->unwanted_threshold) {
+ log_warn("unwanted reply total reached threshold (%u)"
+ " you may be under attack."
+ " defensive action: clearing the cache",
+ (unsigned)outnet->unwanted_threshold);
+ fptr_ok(fptr_whitelist_alloc_cleanup(
+ outnet->unwanted_action));
+ (*outnet->unwanted_action)(outnet->unwanted_param);
+ outnet->unwanted_total = 0;
+ }
+ return 0;
+ }
+
+ verbose(VERB_ALGO, "received udp reply.");
+ log_buf(VERB_ALGO, "udp message", c->buffer);
+ if(p->pc->cp != c) {
+ verbose(VERB_QUERY, "received reply id,addr on wrong port. "
+ "dropped.");
+ outnet->unwanted_replies++;
+ if(outnet->unwanted_threshold && ++outnet->unwanted_total
+ >= outnet->unwanted_threshold) {
+ log_warn("unwanted reply total reached threshold (%u)"
+ " you may be under attack."
+ " defensive action: clearing the cache",
+ (unsigned)outnet->unwanted_threshold);
+ fptr_ok(fptr_whitelist_alloc_cleanup(
+ outnet->unwanted_action));
+ (*outnet->unwanted_action)(outnet->unwanted_param);
+ outnet->unwanted_total = 0;
+ }
+ return 0;
+ }
+ comm_timer_disable(p->timer);
+ verbose(VERB_ALGO, "outnet handle udp reply");
+ /* delete from tree first in case callback creates a retry */
+ (void)rbtree_delete(outnet->pending, p->node.key);
+ if(p->cb) {
+ fptr_ok(fptr_whitelist_pending_udp(p->cb));
+ (void)(*p->cb)(p->pc->cp, p->cb_arg, NETEVENT_NOERROR, reply_info);
+ }
+ portcomm_loweruse(outnet, p->pc);
+ pending_delete(NULL, p);
+ outnet_send_wait_udp(outnet);
+ return 0;
+}
+
+/** calculate number of ip4 and ip6 interfaces*/
+static void
+calc_num46(char** ifs, int num_ifs, int do_ip4, int do_ip6,
+ int* num_ip4, int* num_ip6)
+{
+ int i;
+ *num_ip4 = 0;
+ *num_ip6 = 0;
+ if(num_ifs <= 0) {
+ if(do_ip4)
+ *num_ip4 = 1;
+ if(do_ip6)
+ *num_ip6 = 1;
+ return;
+ }
+ for(i=0; i<num_ifs; i++)
+ {
+ if(str_is_ip6(ifs[i])) {
+ if(do_ip6)
+ (*num_ip6)++;
+ } else {
+ if(do_ip4)
+ (*num_ip4)++;
+ }
+ }
+
+}
+
+void
+pending_udp_timer_delay_cb(void* arg)
+{
+ struct pending* p = (struct pending*)arg;
+ struct outside_network* outnet = p->outnet;
+ verbose(VERB_ALGO, "timeout udp with delay");
+ portcomm_loweruse(outnet, p->pc);
+ pending_delete(outnet, p);
+ outnet_send_wait_udp(outnet);
+}
+
+void
+pending_udp_timer_cb(void *arg)
+{
+ struct pending* p = (struct pending*)arg;
+ struct outside_network* outnet = p->outnet;
+ /* it timed out */
+ verbose(VERB_ALGO, "timeout udp");
+ if(p->cb) {
+ fptr_ok(fptr_whitelist_pending_udp(p->cb));
+ (void)(*p->cb)(p->pc->cp, p->cb_arg, NETEVENT_TIMEOUT, NULL);
+ }
+ /* if delayclose, keep port open for a longer time.
+ * But if the udpwaitlist exists, then we are struggling to
+ * keep up with demand for sockets, so do not wait, but service
+ * the customer (customer service more important than portICMPs) */
+ if(outnet->delayclose && !outnet->udp_wait_first) {
+ p->cb = NULL;
+ p->timer->callback = &pending_udp_timer_delay_cb;
+ comm_timer_set(p->timer, &outnet->delay_tv);
+ return;
+ }
+ portcomm_loweruse(outnet, p->pc);
+ pending_delete(outnet, p);
+ outnet_send_wait_udp(outnet);
+}
+
+/** create pending_tcp buffers */
+static int
+create_pending_tcp(struct outside_network* outnet, size_t bufsize)
+{
+ size_t i;
+ if(outnet->num_tcp == 0)
+ return 1; /* no tcp needed, nothing to do */
+ if(!(outnet->tcp_conns = (struct pending_tcp **)calloc(
+ outnet->num_tcp, sizeof(struct pending_tcp*))))
+ return 0;
+ for(i=0; i<outnet->num_tcp; i++) {
+ if(!(outnet->tcp_conns[i] = (struct pending_tcp*)calloc(1,
+ sizeof(struct pending_tcp))))
+ return 0;
+ outnet->tcp_conns[i]->next_free = outnet->tcp_free;
+ outnet->tcp_free = outnet->tcp_conns[i];
+ outnet->tcp_conns[i]->c = comm_point_create_tcp_out(
+ outnet->base, bufsize, outnet_tcp_cb,
+ outnet->tcp_conns[i]);
+ if(!outnet->tcp_conns[i]->c)
+ return 0;
+ }
+ return 1;
+}
+
+/** setup an outgoing interface, ready address */
+static int setup_if(struct port_if* pif, const char* addrstr,
+ int* avail, int numavail, size_t numfd)
+{
+ pif->avail_total = numavail;
+ pif->avail_ports = (int*)memdup(avail, (size_t)numavail*sizeof(int));
+ if(!pif->avail_ports)
+ return 0;
+ if(!ipstrtoaddr(addrstr, UNBOUND_DNS_PORT, &pif->addr, &pif->addrlen))
+ return 0;
+ pif->maxout = (int)numfd;
+ pif->inuse = 0;
+ pif->out = (struct port_comm**)calloc(numfd,
+ sizeof(struct port_comm*));
+ if(!pif->out)
+ return 0;
+ return 1;
+}
+
+struct outside_network*
+outside_network_create(struct comm_base *base, size_t bufsize,
+ size_t num_ports, char** ifs, int num_ifs, int do_ip4,
+ int do_ip6, size_t num_tcp, struct infra_cache* infra,
+ struct ub_randstate* rnd, int use_caps_for_id, int* availports,
+ int numavailports, size_t unwanted_threshold,
+ void (*unwanted_action)(void*), void* unwanted_param, int do_udp,
+ void* sslctx, int delayclose, struct dt_env* dtenv)
+{
+ struct outside_network* outnet = (struct outside_network*)
+ calloc(1, sizeof(struct outside_network));
+ size_t k;
+ if(!outnet) {
+ log_err("malloc failed");
+ return NULL;
+ }
+ comm_base_timept(base, &outnet->now_secs, &outnet->now_tv);
+ outnet->base = base;
+ outnet->num_tcp = num_tcp;
+ outnet->num_tcp_outgoing = 0;
+ outnet->infra = infra;
+ outnet->rnd = rnd;
+ outnet->sslctx = sslctx;
+#ifdef USE_DNSTAP
+ outnet->dtenv = dtenv;
+#else
+ (void)dtenv;
+#endif
+ outnet->svcd_overhead = 0;
+ outnet->want_to_quit = 0;
+ outnet->unwanted_threshold = unwanted_threshold;
+ outnet->unwanted_action = unwanted_action;
+ outnet->unwanted_param = unwanted_param;
+ outnet->use_caps_for_id = use_caps_for_id;
+ outnet->do_udp = do_udp;
+#ifndef S_SPLINT_S
+ if(delayclose) {
+ outnet->delayclose = 1;
+ outnet->delay_tv.tv_sec = delayclose/1000;
+ outnet->delay_tv.tv_usec = (delayclose%1000)*1000;
+ }
+#endif
+ if(numavailports == 0) {
+ log_err("no outgoing ports available");
+ outside_network_delete(outnet);
+ return NULL;
+ }
+#ifndef INET6
+ do_ip6 = 0;
+#endif
+ calc_num46(ifs, num_ifs, do_ip4, do_ip6,
+ &outnet->num_ip4, &outnet->num_ip6);
+ if(outnet->num_ip4 != 0) {
+ if(!(outnet->ip4_ifs = (struct port_if*)calloc(
+ (size_t)outnet->num_ip4, sizeof(struct port_if)))) {
+ log_err("malloc failed");
+ outside_network_delete(outnet);
+ return NULL;
+ }
+ }
+ if(outnet->num_ip6 != 0) {
+ if(!(outnet->ip6_ifs = (struct port_if*)calloc(
+ (size_t)outnet->num_ip6, sizeof(struct port_if)))) {
+ log_err("malloc failed");
+ outside_network_delete(outnet);
+ return NULL;
+ }
+ }
+ if( !(outnet->udp_buff = sldns_buffer_new(bufsize)) ||
+ !(outnet->pending = rbtree_create(pending_cmp)) ||
+ !(outnet->serviced = rbtree_create(serviced_cmp)) ||
+ !create_pending_tcp(outnet, bufsize)) {
+ log_err("malloc failed");
+ outside_network_delete(outnet);
+ return NULL;
+ }
+
+ /* allocate commpoints */
+ for(k=0; k<num_ports; k++) {
+ struct port_comm* pc;
+ pc = (struct port_comm*)calloc(1, sizeof(*pc));
+ if(!pc) {
+ log_err("malloc failed");
+ outside_network_delete(outnet);
+ return NULL;
+ }
+ pc->cp = comm_point_create_udp(outnet->base, -1,
+ outnet->udp_buff, outnet_udp_cb, outnet);
+ if(!pc->cp) {
+ log_err("malloc failed");
+ free(pc);
+ outside_network_delete(outnet);
+ return NULL;
+ }
+ pc->next = outnet->unused_fds;
+ outnet->unused_fds = pc;
+ }
+
+ /* allocate interfaces */
+ if(num_ifs == 0) {
+ if(do_ip4 && !setup_if(&outnet->ip4_ifs[0], "0.0.0.0",
+ availports, numavailports, num_ports)) {
+ log_err("malloc failed");
+ outside_network_delete(outnet);
+ return NULL;
+ }
+ if(do_ip6 && !setup_if(&outnet->ip6_ifs[0], "::",
+ availports, numavailports, num_ports)) {
+ log_err("malloc failed");
+ outside_network_delete(outnet);
+ return NULL;
+ }
+ } else {
+ size_t done_4 = 0, done_6 = 0;
+ int i;
+ for(i=0; i<num_ifs; i++) {
+ if(str_is_ip6(ifs[i]) && do_ip6) {
+ if(!setup_if(&outnet->ip6_ifs[done_6], ifs[i],
+ availports, numavailports, num_ports)){
+ log_err("malloc failed");
+ outside_network_delete(outnet);
+ return NULL;
+ }
+ done_6++;
+ }
+ if(!str_is_ip6(ifs[i]) && do_ip4) {
+ if(!setup_if(&outnet->ip4_ifs[done_4], ifs[i],
+ availports, numavailports, num_ports)){
+ log_err("malloc failed");
+ outside_network_delete(outnet);
+ return NULL;
+ }
+ done_4++;
+ }
+ }
+ }
+ return outnet;
+}
+
+/** helper pending delete */
+static void
+pending_node_del(rbnode_t* node, void* arg)
+{
+ struct pending* pend = (struct pending*)node;
+ struct outside_network* outnet = (struct outside_network*)arg;
+ pending_delete(outnet, pend);
+}
+
+/** helper serviced delete */
+static void
+serviced_node_del(rbnode_t* node, void* ATTR_UNUSED(arg))
+{
+ struct serviced_query* sq = (struct serviced_query*)node;
+ struct service_callback* p = sq->cblist, *np;
+ free(sq->qbuf);
+ free(sq->zone);
+ while(p) {
+ np = p->next;
+ free(p);
+ p = np;
+ }
+ free(sq);
+}
+
+void
+outside_network_quit_prepare(struct outside_network* outnet)
+{
+ if(!outnet)
+ return;
+ /* prevent queued items from being sent */
+ outnet->want_to_quit = 1;
+}
+
+void
+outside_network_delete(struct outside_network* outnet)
+{
+ if(!outnet)
+ return;
+ outnet->want_to_quit = 1;
+ /* check every element, since we can be called on malloc error */
+ if(outnet->pending) {
+ /* free pending elements, but do no unlink from tree. */
+ traverse_postorder(outnet->pending, pending_node_del, NULL);
+ free(outnet->pending);
+ }
+ if(outnet->serviced) {
+ traverse_postorder(outnet->serviced, serviced_node_del, NULL);
+ free(outnet->serviced);
+ }
+ if(outnet->udp_buff)
+ sldns_buffer_free(outnet->udp_buff);
+ if(outnet->unused_fds) {
+ struct port_comm* p = outnet->unused_fds, *np;
+ while(p) {
+ np = p->next;
+ comm_point_delete(p->cp);
+ free(p);
+ p = np;
+ }
+ outnet->unused_fds = NULL;
+ }
+ if(outnet->ip4_ifs) {
+ int i, k;
+ for(i=0; i<outnet->num_ip4; i++) {
+ for(k=0; k<outnet->ip4_ifs[i].inuse; k++) {
+ struct port_comm* pc = outnet->ip4_ifs[i].
+ out[k];
+ comm_point_delete(pc->cp);
+ free(pc);
+ }
+ free(outnet->ip4_ifs[i].avail_ports);
+ free(outnet->ip4_ifs[i].out);
+ }
+ free(outnet->ip4_ifs);
+ }
+ if(outnet->ip6_ifs) {
+ int i, k;
+ for(i=0; i<outnet->num_ip6; i++) {
+ for(k=0; k<outnet->ip6_ifs[i].inuse; k++) {
+ struct port_comm* pc = outnet->ip6_ifs[i].
+ out[k];
+ comm_point_delete(pc->cp);
+ free(pc);
+ }
+ free(outnet->ip6_ifs[i].avail_ports);
+ free(outnet->ip6_ifs[i].out);
+ }
+ free(outnet->ip6_ifs);
+ }
+ if(outnet->tcp_conns) {
+ size_t i;
+ for(i=0; i<outnet->num_tcp; i++)
+ if(outnet->tcp_conns[i]) {
+ comm_point_delete(outnet->tcp_conns[i]->c);
+ waiting_tcp_delete(outnet->tcp_conns[i]->query);
+ free(outnet->tcp_conns[i]);
+ }
+ free(outnet->tcp_conns);
+ }
+ if(outnet->tcp_wait_first) {
+ struct waiting_tcp* p = outnet->tcp_wait_first, *np;
+ while(p) {
+ np = p->next_waiting;
+ waiting_tcp_delete(p);
+ p = np;
+ }
+ }
+ if(outnet->udp_wait_first) {
+ struct pending* p = outnet->udp_wait_first, *np;
+ while(p) {
+ np = p->next_waiting;
+ pending_delete(NULL, p);
+ p = np;
+ }
+ }
+ free(outnet);
+}
+
+void
+pending_delete(struct outside_network* outnet, struct pending* p)
+{
+ if(!p)
+ return;
+ if(outnet && outnet->udp_wait_first &&
+ (p->next_waiting || p == outnet->udp_wait_last) ) {
+ /* delete from waiting list, if it is in the waiting list */
+ struct pending* prev = NULL, *x = outnet->udp_wait_first;
+ while(x && x != p) {
+ prev = x;
+ x = x->next_waiting;
+ }
+ if(x) {
+ log_assert(x == p);
+ if(prev)
+ prev->next_waiting = p->next_waiting;
+ else outnet->udp_wait_first = p->next_waiting;
+ if(outnet->udp_wait_last == p)
+ outnet->udp_wait_last = prev;
+ }
+ }
+ if(outnet) {
+ (void)rbtree_delete(outnet->pending, p->node.key);
+ }
+ if(p->timer)
+ comm_timer_delete(p->timer);
+ free(p->pkt);
+ free(p);
+}
+
+/**
+ * Try to open a UDP socket for outgoing communication.
+ * Sets sockets options as needed.
+ * @param addr: socket address.
+ * @param addrlen: length of address.
+ * @param port: port override for addr.
+ * @param inuse: if -1 is returned, this bool means the port was in use.
+ * @return fd or -1
+ */
+static int
+udp_sockport(struct sockaddr_storage* addr, socklen_t addrlen, int port,
+ int* inuse)
+{
+ int fd, noproto;
+ if(addr_is_ip6(addr, addrlen)) {
+ struct sockaddr_in6* sa = (struct sockaddr_in6*)addr;
+ sa->sin6_port = (in_port_t)htons((uint16_t)port);
+ fd = create_udp_sock(AF_INET6, SOCK_DGRAM,
+ (struct sockaddr*)addr, addrlen, 1, inuse, &noproto,
+ 0, 0, 0, NULL);
+ } else {
+ struct sockaddr_in* sa = (struct sockaddr_in*)addr;
+ sa->sin_port = (in_port_t)htons((uint16_t)port);
+ fd = create_udp_sock(AF_INET, SOCK_DGRAM,
+ (struct sockaddr*)addr, addrlen, 1, inuse, &noproto,
+ 0, 0, 0, NULL);
+ }
+ return fd;
+}
+
+/** Select random ID */
+static int
+select_id(struct outside_network* outnet, struct pending* pend,
+ sldns_buffer* packet)
+{
+ int id_tries = 0;
+ pend->id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff;
+ LDNS_ID_SET(sldns_buffer_begin(packet), pend->id);
+
+ /* insert in tree */
+ pend->node.key = pend;
+ while(!rbtree_insert(outnet->pending, &pend->node)) {
+ /* change ID to avoid collision */
+ pend->id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff;
+ LDNS_ID_SET(sldns_buffer_begin(packet), pend->id);
+ id_tries++;
+ if(id_tries == MAX_ID_RETRY) {
+ pend->id=99999; /* non existant ID */
+ log_err("failed to generate unique ID, drop msg");
+ return 0;
+ }
+ }
+ verbose(VERB_ALGO, "inserted new pending reply id=%4.4x", pend->id);
+ return 1;
+}
+
+/** Select random interface and port */
+static int
+select_ifport(struct outside_network* outnet, struct pending* pend,
+ int num_if, struct port_if* ifs)
+{
+ int my_if, my_port, fd, portno, inuse, tries=0;
+ struct port_if* pif;
+ /* randomly select interface and port */
+ if(num_if == 0) {
+ verbose(VERB_QUERY, "Need to send query but have no "
+ "outgoing interfaces of that family");
+ return 0;
+ }
+ log_assert(outnet->unused_fds);
+ tries = 0;
+ while(1) {
+ my_if = ub_random_max(outnet->rnd, num_if);
+ pif = &ifs[my_if];
+ my_port = ub_random_max(outnet->rnd, pif->avail_total);
+ if(my_port < pif->inuse) {
+ /* port already open */
+ pend->pc = pif->out[my_port];
+ verbose(VERB_ALGO, "using UDP if=%d port=%d",
+ my_if, pend->pc->number);
+ break;
+ }
+ /* try to open new port, if fails, loop to try again */
+ log_assert(pif->inuse < pif->maxout);
+ portno = pif->avail_ports[my_port - pif->inuse];
+ fd = udp_sockport(&pif->addr, pif->addrlen, portno, &inuse);
+ if(fd == -1 && !inuse) {
+ /* nonrecoverable error making socket */
+ return 0;
+ }
+ if(fd != -1) {
+ verbose(VERB_ALGO, "opened UDP if=%d port=%d",
+ my_if, portno);
+ /* grab fd */
+ pend->pc = outnet->unused_fds;
+ outnet->unused_fds = pend->pc->next;
+
+ /* setup portcomm */
+ pend->pc->next = NULL;
+ pend->pc->number = portno;
+ pend->pc->pif = pif;
+ pend->pc->index = pif->inuse;
+ pend->pc->num_outstanding = 0;
+ comm_point_start_listening(pend->pc->cp, fd, -1);
+
+ /* grab port in interface */
+ pif->out[pif->inuse] = pend->pc;
+ pif->avail_ports[my_port - pif->inuse] =
+ pif->avail_ports[pif->avail_total-pif->inuse-1];
+ pif->inuse++;
+ break;
+ }
+ /* failed, already in use */
+ verbose(VERB_QUERY, "port %d in use, trying another", portno);
+ tries++;
+ if(tries == MAX_PORT_RETRY) {
+ log_err("failed to find an open port, drop msg");
+ return 0;
+ }
+ }
+ log_assert(pend->pc);
+ pend->pc->num_outstanding++;
+
+ return 1;
+}
+
+static int
+randomize_and_send_udp(struct pending* pend, sldns_buffer* packet, int timeout)
+{
+ struct timeval tv;
+ struct outside_network* outnet = pend->sq->outnet;
+
+ /* select id */
+ if(!select_id(outnet, pend, packet)) {
+ return 0;
+ }
+
+ /* select src_if, port */
+ if(addr_is_ip6(&pend->addr, pend->addrlen)) {
+ if(!select_ifport(outnet, pend,
+ outnet->num_ip6, outnet->ip6_ifs))
+ return 0;
+ } else {
+ if(!select_ifport(outnet, pend,
+ outnet->num_ip4, outnet->ip4_ifs))
+ return 0;
+ }
+ log_assert(pend->pc && pend->pc->cp);
+
+ /* send it over the commlink */
+ if(!comm_point_send_udp_msg(pend->pc->cp, packet,
+ (struct sockaddr*)&pend->addr, pend->addrlen)) {
+ portcomm_loweruse(outnet, pend->pc);
+ return 0;
+ }
+
+ /* system calls to set timeout after sending UDP to make roundtrip
+ smaller. */
+#ifndef S_SPLINT_S
+ tv.tv_sec = timeout/1000;
+ tv.tv_usec = (timeout%1000)*1000;
+#endif
+ comm_timer_set(pend->timer, &tv);
+
+#ifdef USE_DNSTAP
+ if(outnet->dtenv &&
+ (outnet->dtenv->log_resolver_query_messages ||
+ outnet->dtenv->log_forwarder_query_messages))
+ dt_msg_send_outside_query(outnet->dtenv, &pend->addr, comm_udp,
+ pend->sq->zone, pend->sq->zonelen, packet);
+#endif
+ return 1;
+}
+
+struct pending*
+pending_udp_query(struct serviced_query* sq, struct sldns_buffer* packet,
+ int timeout, comm_point_callback_t* cb, void* cb_arg)
+{
+ struct pending* pend = (struct pending*)calloc(1, sizeof(*pend));
+ if(!pend) return NULL;
+ pend->outnet = sq->outnet;
+ pend->sq = sq;
+ pend->addrlen = sq->addrlen;
+ memmove(&pend->addr, &sq->addr, sq->addrlen);
+ pend->cb = cb;
+ pend->cb_arg = cb_arg;
+ pend->node.key = pend;
+ pend->timer = comm_timer_create(sq->outnet->base, pending_udp_timer_cb,
+ pend);
+ if(!pend->timer) {
+ free(pend);
+ return NULL;
+ }
+
+ if(sq->outnet->unused_fds == NULL) {
+ /* no unused fd, cannot create a new port (randomly) */
+ verbose(VERB_ALGO, "no fds available, udp query waiting");
+ pend->timeout = timeout;
+ pend->pkt_len = sldns_buffer_limit(packet);
+ pend->pkt = (uint8_t*)memdup(sldns_buffer_begin(packet),
+ pend->pkt_len);
+ if(!pend->pkt) {
+ comm_timer_delete(pend->timer);
+ free(pend);
+ return NULL;
+ }
+ /* put at end of waiting list */
+ if(sq->outnet->udp_wait_last)
+ sq->outnet->udp_wait_last->next_waiting = pend;
+ else
+ sq->outnet->udp_wait_first = pend;
+ sq->outnet->udp_wait_last = pend;
+ return pend;
+ }
+ if(!randomize_and_send_udp(pend, packet, timeout)) {
+ pending_delete(sq->outnet, pend);
+ return NULL;
+ }
+ return pend;
+}
+
+void
+outnet_tcptimer(void* arg)
+{
+ struct waiting_tcp* w = (struct waiting_tcp*)arg;
+ struct outside_network* outnet = w->outnet;
+ comm_point_callback_t* cb;
+ void* cb_arg;
+ if(w->pkt) {
+ /* it is on the waiting list */
+ waiting_list_remove(outnet, w);
+ } else {
+ /* it was in use */
+ struct pending_tcp* pend=(struct pending_tcp*)w->next_waiting;
+ comm_point_close(pend->c);
+ pend->query = NULL;
+ pend->next_free = outnet->tcp_free;
+ outnet->tcp_free = pend;
+ }
+ cb = w->cb;
+ cb_arg = w->cb_arg;
+ waiting_tcp_delete(w);
+ fptr_ok(fptr_whitelist_pending_tcp(cb));
+ (void)(*cb)(NULL, cb_arg, NETEVENT_TIMEOUT, NULL);
+ use_free_buffer(outnet);
+}
+
+struct waiting_tcp*
+pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet,
+ int timeout, comm_point_callback_t* callback, void* callback_arg)
+{
+ struct pending_tcp* pend = sq->outnet->tcp_free;
+ struct waiting_tcp* w;
+ struct timeval tv;
+ uint16_t id;
+ /* if no buffer is free allocate space to store query */
+ w = (struct waiting_tcp*)malloc(sizeof(struct waiting_tcp)
+ + (pend?0:sldns_buffer_limit(packet)));
+ if(!w) {
+ return NULL;
+ }
+ if(!(w->timer = comm_timer_create(sq->outnet->base, outnet_tcptimer, w))) {
+ free(w);
+ return NULL;
+ }
+ w->pkt = NULL;
+ w->pkt_len = 0;
+ id = ((unsigned)ub_random(sq->outnet->rnd)>>8) & 0xffff;
+ LDNS_ID_SET(sldns_buffer_begin(packet), id);
+ memcpy(&w->addr, &sq->addr, sq->addrlen);
+ w->addrlen = sq->addrlen;
+ w->outnet = sq->outnet;
+ w->cb = callback;
+ w->cb_arg = callback_arg;
+ w->ssl_upstream = sq->ssl_upstream;
+#ifndef S_SPLINT_S
+ tv.tv_sec = timeout;
+ tv.tv_usec = 0;
+#endif
+ comm_timer_set(w->timer, &tv);
+ if(pend) {
+ /* we have a buffer available right now */
+ if(!outnet_tcp_take_into_use(w, sldns_buffer_begin(packet),
+ sldns_buffer_limit(packet))) {
+ waiting_tcp_delete(w);
+ return NULL;
+ }
+#ifdef USE_DNSTAP
+ if(sq->outnet->dtenv &&
+ (sq->outnet->dtenv->log_resolver_query_messages ||
+ sq->outnet->dtenv->log_forwarder_query_messages))
+ dt_msg_send_outside_query(sq->outnet->dtenv, &sq->addr,
+ comm_tcp, sq->zone, sq->zonelen, packet);
+#endif
+ } else {
+ /* queue up */
+ w->pkt = (uint8_t*)w + sizeof(struct waiting_tcp);
+ w->pkt_len = sldns_buffer_limit(packet);
+ memmove(w->pkt, sldns_buffer_begin(packet), w->pkt_len);
+ w->next_waiting = NULL;
+ if(sq->outnet->tcp_wait_last)
+ sq->outnet->tcp_wait_last->next_waiting = w;
+ else sq->outnet->tcp_wait_first = w;
+ sq->outnet->tcp_wait_last = w;
+ }
+ return w;
+}
+
+/** create query for serviced queries */
+static void
+serviced_gen_query(sldns_buffer* buff, uint8_t* qname, size_t qnamelen,
+ uint16_t qtype, uint16_t qclass, uint16_t flags)
+{
+ sldns_buffer_clear(buff);
+ /* skip id */
+ sldns_buffer_write_u16(buff, flags);
+ sldns_buffer_write_u16(buff, 1); /* qdcount */
+ sldns_buffer_write_u16(buff, 0); /* ancount */
+ sldns_buffer_write_u16(buff, 0); /* nscount */
+ sldns_buffer_write_u16(buff, 0); /* arcount */
+ sldns_buffer_write(buff, qname, qnamelen);
+ sldns_buffer_write_u16(buff, qtype);
+ sldns_buffer_write_u16(buff, qclass);
+ sldns_buffer_flip(buff);
+}
+
+/** lookup serviced query in serviced query rbtree */
+static struct serviced_query*
+lookup_serviced(struct outside_network* outnet, sldns_buffer* buff, int dnssec,
+ struct sockaddr_storage* addr, socklen_t addrlen)
+{
+ struct serviced_query key;
+ key.node.key = &key;
+ key.qbuf = sldns_buffer_begin(buff);
+ key.qbuflen = sldns_buffer_limit(buff);
+ key.dnssec = dnssec;
+ memcpy(&key.addr, addr, addrlen);
+ key.addrlen = addrlen;
+ key.outnet = outnet;
+ return (struct serviced_query*)rbtree_search(outnet->serviced, &key);
+}
+
+/** Create new serviced entry */
+static struct serviced_query*
+serviced_create(struct outside_network* outnet, sldns_buffer* buff, int dnssec,
+ int want_dnssec, int nocaps, int tcp_upstream, int ssl_upstream,
+ struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* zone,
+ size_t zonelen, int qtype)
+{
+ struct serviced_query* sq = (struct serviced_query*)malloc(sizeof(*sq));
+#ifdef UNBOUND_DEBUG
+ rbnode_t* ins;
+#endif
+ if(!sq)
+ return NULL;
+ sq->node.key = sq;
+ sq->qbuf = memdup(sldns_buffer_begin(buff), sldns_buffer_limit(buff));
+ if(!sq->qbuf) {
+ free(sq);
+ return NULL;
+ }
+ sq->qbuflen = sldns_buffer_limit(buff);
+ sq->zone = memdup(zone, zonelen);
+ if(!sq->zone) {
+ free(sq->qbuf);
+ free(sq);
+ return NULL;
+ }
+ sq->zonelen = zonelen;
+ sq->qtype = qtype;
+ sq->dnssec = dnssec;
+ sq->want_dnssec = want_dnssec;
+ sq->nocaps = nocaps;
+ sq->tcp_upstream = tcp_upstream;
+ sq->ssl_upstream = ssl_upstream;
+ memcpy(&sq->addr, addr, addrlen);
+ sq->addrlen = addrlen;
+ sq->outnet = outnet;
+ sq->cblist = NULL;
+ sq->pending = NULL;
+ sq->status = serviced_initial;
+ sq->retry = 0;
+ sq->to_be_deleted = 0;
+#ifdef UNBOUND_DEBUG
+ ins =
+#else
+ (void)
+#endif
+ rbtree_insert(outnet->serviced, &sq->node);
+ log_assert(ins != NULL); /* must not be already present */
+ return sq;
+}
+
+/** remove waiting tcp from the outnet waiting list */
+static void
+waiting_list_remove(struct outside_network* outnet, struct waiting_tcp* w)
+{
+ struct waiting_tcp* p = outnet->tcp_wait_first, *prev = NULL;
+ while(p) {
+ if(p == w) {
+ /* remove w */
+ if(prev)
+ prev->next_waiting = w->next_waiting;
+ else outnet->tcp_wait_first = w->next_waiting;
+ if(outnet->tcp_wait_last == w)
+ outnet->tcp_wait_last = prev;
+ return;
+ }
+ prev = p;
+ p = p->next_waiting;
+ }
+}
+
+/** cleanup serviced query entry */
+static void
+serviced_delete(struct serviced_query* sq)
+{
+ if(sq->pending) {
+ /* clear up the pending query */
+ if(sq->status == serviced_query_UDP_EDNS ||
+ sq->status == serviced_query_UDP ||
+ sq->status == serviced_query_PROBE_EDNS ||
+ sq->status == serviced_query_UDP_EDNS_FRAG ||
+ sq->status == serviced_query_UDP_EDNS_fallback) {
+ struct pending* p = (struct pending*)sq->pending;
+ if(p->pc)
+ portcomm_loweruse(sq->outnet, p->pc);
+ pending_delete(sq->outnet, p);
+ /* this call can cause reentrant calls back into the
+ * mesh */
+ outnet_send_wait_udp(sq->outnet);
+ } else {
+ struct waiting_tcp* p = (struct waiting_tcp*)
+ sq->pending;
+ if(p->pkt == NULL) {
+ decomission_pending_tcp(sq->outnet,
+ (struct pending_tcp*)p->next_waiting);
+ } else {
+ waiting_list_remove(sq->outnet, p);
+ waiting_tcp_delete(p);
+ }
+ }
+ }
+ /* does not delete from tree, caller has to do that */
+ serviced_node_del(&sq->node, NULL);
+}
+
+/** perturb a dname capitalization randomly */
+static void
+serviced_perturb_qname(struct ub_randstate* rnd, uint8_t* qbuf, size_t len)
+{
+ uint8_t lablen;
+ uint8_t* d = qbuf + 10;
+ long int random = 0;
+ int bits = 0;
+ log_assert(len >= 10 + 5 /* offset qname, root, qtype, qclass */);
+ lablen = *d++;
+ while(lablen) {
+ while(lablen--) {
+ /* only perturb A-Z, a-z */
+ if(isalpha((int)*d)) {
+ /* get a random bit */
+ if(bits == 0) {
+ random = ub_random(rnd);
+ bits = 30;
+ }
+ if(random & 0x1) {
+ *d = (uint8_t)toupper((int)*d);
+ } else {
+ *d = (uint8_t)tolower((int)*d);
+ }
+ random >>= 1;
+ bits--;
+ }
+ d++;
+ }
+ lablen = *d++;
+ }
+ if(verbosity >= VERB_ALGO) {
+ char buf[LDNS_MAX_DOMAINLEN+1];
+ dname_str(qbuf+10, buf);
+ verbose(VERB_ALGO, "qname perturbed to %s", buf);
+ }
+}
+
+/** put serviced query into a buffer */
+static void
+serviced_encode(struct serviced_query* sq, sldns_buffer* buff, int with_edns)
+{
+ /* if we are using 0x20 bits for ID randomness, perturb them */
+ if(sq->outnet->use_caps_for_id && !sq->nocaps) {
+ serviced_perturb_qname(sq->outnet->rnd, sq->qbuf, sq->qbuflen);
+ }
+ /* generate query */
+ sldns_buffer_clear(buff);
+ sldns_buffer_write_u16(buff, 0); /* id placeholder */
+ sldns_buffer_write(buff, sq->qbuf, sq->qbuflen);
+ sldns_buffer_flip(buff);
+ if(with_edns) {
+ /* add edns section */
+ struct edns_data edns;
+ edns.edns_present = 1;
+ edns.ext_rcode = 0;
+ edns.edns_version = EDNS_ADVERTISED_VERSION;
+ if(sq->status == serviced_query_UDP_EDNS_FRAG) {
+ if(addr_is_ip6(&sq->addr, sq->addrlen)) {
+ if(EDNS_FRAG_SIZE_IP6 < EDNS_ADVERTISED_SIZE)
+ edns.udp_size = EDNS_FRAG_SIZE_IP6;
+ else edns.udp_size = EDNS_ADVERTISED_SIZE;
+ } else {
+ if(EDNS_FRAG_SIZE_IP4 < EDNS_ADVERTISED_SIZE)
+ edns.udp_size = EDNS_FRAG_SIZE_IP4;
+ else edns.udp_size = EDNS_ADVERTISED_SIZE;
+ }
+ } else {
+ edns.udp_size = EDNS_ADVERTISED_SIZE;
+ }
+ edns.bits = 0;
+ if(sq->dnssec & EDNS_DO)
+ edns.bits = EDNS_DO;
+ if(sq->dnssec & BIT_CD)
+ LDNS_CD_SET(sldns_buffer_begin(buff));
+ attach_edns_record(buff, &edns);
+ }
+}
+
+/**
+ * Perform serviced query UDP sending operation.
+ * Sends UDP with EDNS, unless infra host marked non EDNS.
+ * @param sq: query to send.
+ * @param buff: buffer scratch space.
+ * @return 0 on error.
+ */
+static int
+serviced_udp_send(struct serviced_query* sq, sldns_buffer* buff)
+{
+ int rtt, vs;
+ uint8_t edns_lame_known;
+ time_t now = *sq->outnet->now_secs;
+
+ if(!infra_host(sq->outnet->infra, &sq->addr, sq->addrlen, sq->zone,
+ sq->zonelen, now, &vs, &edns_lame_known, &rtt))
+ return 0;
+ sq->last_rtt = rtt;
+ verbose(VERB_ALGO, "EDNS lookup known=%d vs=%d", edns_lame_known, vs);
+ if(sq->status == serviced_initial) {
+ if(edns_lame_known == 0 && rtt > 5000 && rtt < 10001) {
+ /* perform EDNS lame probe - check if server is
+ * EDNS lame (EDNS queries to it are dropped) */
+ verbose(VERB_ALGO, "serviced query: send probe to see "
+ " if use of EDNS causes timeouts");
+ /* even 700 msec may be too small */
+ rtt = 1000;
+ sq->status = serviced_query_PROBE_EDNS;
+ } else if(vs != -1) {
+ sq->status = serviced_query_UDP_EDNS;
+ } else {
+ sq->status = serviced_query_UDP;
+ }
+ }
+ serviced_encode(sq, buff, (sq->status == serviced_query_UDP_EDNS) ||
+ (sq->status == serviced_query_UDP_EDNS_FRAG));
+ sq->last_sent_time = *sq->outnet->now_tv;
+ sq->edns_lame_known = (int)edns_lame_known;
+ verbose(VERB_ALGO, "serviced query UDP timeout=%d msec", rtt);
+ sq->pending = pending_udp_query(sq, buff, rtt,
+ serviced_udp_callback, sq);
+ if(!sq->pending)
+ return 0;
+ return 1;
+}
+
+/** check that perturbed qname is identical */
+static int
+serviced_check_qname(sldns_buffer* pkt, uint8_t* qbuf, size_t qbuflen)
+{
+ uint8_t* d1 = sldns_buffer_at(pkt, 12);
+ uint8_t* d2 = qbuf+10;
+ uint8_t len1, len2;
+ int count = 0;
+ log_assert(qbuflen >= 15 /* 10 header, root, type, class */);
+ len1 = *d1++;
+ len2 = *d2++;
+ if(sldns_buffer_limit(pkt) < 12+1+4) /* packet too small for qname */
+ return 0;
+ while(len1 != 0 || len2 != 0) {
+ if(LABEL_IS_PTR(len1)) {
+ d1 = sldns_buffer_at(pkt, PTR_OFFSET(len1, *d1));
+ if(d1 >= sldns_buffer_at(pkt, sldns_buffer_limit(pkt)))
+ return 0;
+ len1 = *d1++;
+ if(count++ > MAX_COMPRESS_PTRS)
+ return 0;
+ continue;
+ }
+ if(d2 > qbuf+qbuflen)
+ return 0;
+ if(len1 != len2)
+ return 0;
+ if(len1 > LDNS_MAX_LABELLEN)
+ return 0;
+ log_assert(len1 <= LDNS_MAX_LABELLEN);
+ log_assert(len2 <= LDNS_MAX_LABELLEN);
+ log_assert(len1 == len2 && len1 != 0);
+ /* compare the labels - bitwise identical */
+ if(memcmp(d1, d2, len1) != 0)
+ return 0;
+ d1 += len1;
+ d2 += len2;
+ len1 = *d1++;
+ len2 = *d2++;
+ }
+ return 1;
+}
+
+/** call the callbacks for a serviced query */
+static void
+serviced_callbacks(struct serviced_query* sq, int error, struct comm_point* c,
+ struct comm_reply* rep)
+{
+ struct service_callback* p;
+ int dobackup = (sq->cblist && sq->cblist->next); /* >1 cb*/
+ uint8_t *backup_p = NULL;
+ size_t backlen = 0;
+#ifdef UNBOUND_DEBUG
+ rbnode_t* rem =
+#else
+ (void)
+#endif
+ /* remove from tree, and schedule for deletion, so that callbacks
+ * can safely deregister themselves and even create new serviced
+ * queries that are identical to this one. */
+ rbtree_delete(sq->outnet->serviced, sq);
+ log_assert(rem); /* should have been present */
+ sq->to_be_deleted = 1;
+ verbose(VERB_ALGO, "svcd callbacks start");
+ if(sq->outnet->use_caps_for_id && error == NETEVENT_NOERROR && c) {
+ /* noerror and nxdomain must have a qname in reply */
+ if(sldns_buffer_read_u16_at(c->buffer, 4) == 0 &&
+ (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer))
+ == LDNS_RCODE_NOERROR ||
+ LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer))
+ == LDNS_RCODE_NXDOMAIN)) {
+ verbose(VERB_DETAIL, "no qname in reply to check 0x20ID");
+ log_addr(VERB_DETAIL, "from server",
+ &sq->addr, sq->addrlen);
+ log_buf(VERB_DETAIL, "for packet", c->buffer);
+ error = NETEVENT_CLOSED;
+ c = NULL;
+ } else if(sldns_buffer_read_u16_at(c->buffer, 4) > 0 &&
+ !serviced_check_qname(c->buffer, sq->qbuf,
+ sq->qbuflen)) {
+ verbose(VERB_DETAIL, "wrong 0x20-ID in reply qname");
+ log_addr(VERB_DETAIL, "from server",
+ &sq->addr, sq->addrlen);
+ log_buf(VERB_DETAIL, "for packet", c->buffer);
+ error = NETEVENT_CAPSFAIL;
+ /* and cleanup too */
+ pkt_dname_tolower(c->buffer,
+ sldns_buffer_at(c->buffer, 12));
+ } else {
+ verbose(VERB_ALGO, "good 0x20-ID in reply qname");
+ /* cleanup caps, prettier cache contents. */
+ pkt_dname_tolower(c->buffer,
+ sldns_buffer_at(c->buffer, 12));
+ }
+ }
+ if(dobackup && c) {
+ /* make a backup of the query, since the querystate processing
+ * may send outgoing queries that overwrite the buffer.
+ * use secondary buffer to store the query.
+ * This is a data copy, but faster than packet to server */
+ backlen = sldns_buffer_limit(c->buffer);
+ backup_p = memdup(sldns_buffer_begin(c->buffer), backlen);
+ if(!backup_p) {
+ log_err("malloc failure in serviced query callbacks");
+ error = NETEVENT_CLOSED;
+ c = NULL;
+ }
+ sq->outnet->svcd_overhead = backlen;
+ }
+ /* test the actual sq->cblist, because the next elem could be deleted*/
+ while((p=sq->cblist) != NULL) {
+ sq->cblist = p->next; /* remove this element */
+ if(dobackup && c) {
+ sldns_buffer_clear(c->buffer);
+ sldns_buffer_write(c->buffer, backup_p, backlen);
+ sldns_buffer_flip(c->buffer);
+ }
+ fptr_ok(fptr_whitelist_serviced_query(p->cb));
+ (void)(*p->cb)(c, p->cb_arg, error, rep);
+ free(p);
+ }
+ if(backup_p) {
+ free(backup_p);
+ sq->outnet->svcd_overhead = 0;
+ }
+ verbose(VERB_ALGO, "svcd callbacks end");
+ log_assert(sq->cblist == NULL);
+ serviced_delete(sq);
+}
+
+int
+serviced_tcp_callback(struct comm_point* c, void* arg, int error,
+ struct comm_reply* rep)
+{
+ struct serviced_query* sq = (struct serviced_query*)arg;
+ struct comm_reply r2;
+ sq->pending = NULL; /* removed after this callback */
+ if(error != NETEVENT_NOERROR)
+ log_addr(VERB_QUERY, "tcp error for address",
+ &sq->addr, sq->addrlen);
+ if(error==NETEVENT_NOERROR)
+ infra_update_tcp_works(sq->outnet->infra, &sq->addr,
+ sq->addrlen, sq->zone, sq->zonelen);
+#ifdef USE_DNSTAP
+ if(sq->outnet->dtenv &&
+ (sq->outnet->dtenv->log_resolver_response_messages ||
+ sq->outnet->dtenv->log_forwarder_response_messages))
+ dt_msg_send_outside_response(sq->outnet->dtenv, &sq->addr,
+ c->type, sq->zone, sq->zonelen, sq->qbuf, sq->qbuflen,
+ &sq->last_sent_time, sq->outnet->now_tv, c->buffer);
+#endif
+ if(error==NETEVENT_NOERROR && sq->status == serviced_query_TCP_EDNS &&
+ (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) ==
+ LDNS_RCODE_FORMERR || LDNS_RCODE_WIRE(sldns_buffer_begin(
+ c->buffer)) == LDNS_RCODE_NOTIMPL) ) {
+ /* attempt to fallback to nonEDNS */
+ sq->status = serviced_query_TCP_EDNS_fallback;
+ serviced_tcp_initiate(sq, c->buffer);
+ return 0;
+ } else if(error==NETEVENT_NOERROR &&
+ sq->status == serviced_query_TCP_EDNS_fallback &&
+ (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) ==
+ LDNS_RCODE_NOERROR || LDNS_RCODE_WIRE(
+ sldns_buffer_begin(c->buffer)) == LDNS_RCODE_NXDOMAIN
+ || LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer))
+ == LDNS_RCODE_YXDOMAIN)) {
+ /* the fallback produced a result that looks promising, note
+ * that this server should be approached without EDNS */
+ /* only store noEDNS in cache if domain is noDNSSEC */
+ if(!sq->want_dnssec)
+ if(!infra_edns_update(sq->outnet->infra, &sq->addr,
+ sq->addrlen, sq->zone, sq->zonelen, -1,
+ *sq->outnet->now_secs))
+ log_err("Out of memory caching no edns for host");
+ sq->status = serviced_query_TCP;
+ }
+ if(sq->tcp_upstream || sq->ssl_upstream) {
+ struct timeval now = *sq->outnet->now_tv;
+ if(now.tv_sec > sq->last_sent_time.tv_sec ||
+ (now.tv_sec == sq->last_sent_time.tv_sec &&
+ now.tv_usec > sq->last_sent_time.tv_usec)) {
+ /* convert from microseconds to milliseconds */
+ int roundtime = ((int)(now.tv_sec - sq->last_sent_time.tv_sec))*1000
+ + ((int)now.tv_usec - (int)sq->last_sent_time.tv_usec)/1000;
+ verbose(VERB_ALGO, "measured TCP-time at %d msec", roundtime);
+ log_assert(roundtime >= 0);
+ /* only store if less then AUTH_TIMEOUT seconds, it could be
+ * huge due to system-hibernated and we woke up */
+ if(roundtime < TCP_AUTH_QUERY_TIMEOUT*1000) {
+ if(!infra_rtt_update(sq->outnet->infra, &sq->addr,
+ sq->addrlen, sq->zone, sq->zonelen, sq->qtype,
+ roundtime, sq->last_rtt, (time_t)now.tv_sec))
+ log_err("out of memory noting rtt.");
+ }
+ }
+ }
+ /* insert address into reply info */
+ if(!rep) {
+ /* create one if there isn't (on errors) */
+ rep = &r2;
+ r2.c = c;
+ }
+ memcpy(&rep->addr, &sq->addr, sq->addrlen);
+ rep->addrlen = sq->addrlen;
+ serviced_callbacks(sq, error, c, rep);
+ return 0;
+}
+
+static void
+serviced_tcp_initiate(struct serviced_query* sq, sldns_buffer* buff)
+{
+ verbose(VERB_ALGO, "initiate TCP query %s",
+ sq->status==serviced_query_TCP_EDNS?"EDNS":"");
+ serviced_encode(sq, buff, sq->status == serviced_query_TCP_EDNS);
+ sq->last_sent_time = *sq->outnet->now_tv;
+ sq->pending = pending_tcp_query(sq, buff, TCP_AUTH_QUERY_TIMEOUT,
+ serviced_tcp_callback, sq);
+ if(!sq->pending) {
+ /* delete from tree so that a retry by above layer does not
+ * clash with this entry */
+ log_err("serviced_tcp_initiate: failed to send tcp query");
+ serviced_callbacks(sq, NETEVENT_CLOSED, NULL, NULL);
+ }
+}
+
+/** Send serviced query over TCP return false on initial failure */
+static int
+serviced_tcp_send(struct serviced_query* sq, sldns_buffer* buff)
+{
+ int vs, rtt;
+ uint8_t edns_lame_known;
+ if(!infra_host(sq->outnet->infra, &sq->addr, sq->addrlen, sq->zone,
+ sq->zonelen, *sq->outnet->now_secs, &vs, &edns_lame_known,
+ &rtt))
+ return 0;
+ if(vs != -1)
+ sq->status = serviced_query_TCP_EDNS;
+ else sq->status = serviced_query_TCP;
+ serviced_encode(sq, buff, sq->status == serviced_query_TCP_EDNS);
+ sq->last_sent_time = *sq->outnet->now_tv;
+ sq->pending = pending_tcp_query(sq, buff, TCP_AUTH_QUERY_TIMEOUT,
+ serviced_tcp_callback, sq);
+ return sq->pending != NULL;
+}
+
+int
+serviced_udp_callback(struct comm_point* c, void* arg, int error,
+ struct comm_reply* rep)
+{
+ struct serviced_query* sq = (struct serviced_query*)arg;
+ struct outside_network* outnet = sq->outnet;
+ struct timeval now = *sq->outnet->now_tv;
+ int fallback_tcp = 0;
+
+ sq->pending = NULL; /* removed after callback */
+ if(error == NETEVENT_TIMEOUT) {
+ int rto = 0;
+ if(sq->status == serviced_query_PROBE_EDNS) {
+ /* non-EDNS probe failed; we do not know its status,
+ * keep trying with EDNS, timeout may not be caused
+ * by EDNS. */
+ sq->status = serviced_query_UDP_EDNS;
+ }
+ if(sq->status == serviced_query_UDP_EDNS && sq->last_rtt < 5000) {
+ /* fallback to 1480/1280 */
+ sq->status = serviced_query_UDP_EDNS_FRAG;
+ log_name_addr(VERB_ALGO, "try edns1xx0", sq->qbuf+10,
+ &sq->addr, sq->addrlen);
+ if(!serviced_udp_send(sq, c->buffer)) {
+ serviced_callbacks(sq, NETEVENT_CLOSED, c, rep);
+ }
+ return 0;
+ }
+ if(sq->status == serviced_query_UDP_EDNS_FRAG) {
+ /* fragmentation size did not fix it */
+ sq->status = serviced_query_UDP_EDNS;
+ }
+ sq->retry++;
+ if(!(rto=infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen,
+ sq->zone, sq->zonelen, sq->qtype, -1, sq->last_rtt,
+ (time_t)now.tv_sec)))
+ log_err("out of memory in UDP exponential backoff");
+ if(sq->retry < OUTBOUND_UDP_RETRY) {
+ log_name_addr(VERB_ALGO, "retry query", sq->qbuf+10,
+ &sq->addr, sq->addrlen);
+ if(!serviced_udp_send(sq, c->buffer)) {
+ serviced_callbacks(sq, NETEVENT_CLOSED, c, rep);
+ }
+ return 0;
+ }
+ if(rto >= RTT_MAX_TIMEOUT) {
+ fallback_tcp = 1;
+ /* UDP does not work, fallback to TCP below */
+ } else {
+ serviced_callbacks(sq, NETEVENT_TIMEOUT, c, rep);
+ return 0;
+ }
+ } else if(error != NETEVENT_NOERROR) {
+ /* udp returns error (due to no ID or interface available) */
+ serviced_callbacks(sq, error, c, rep);
+ return 0;
+ }
+#ifdef USE_DNSTAP
+ if(outnet->dtenv &&
+ (outnet->dtenv->log_resolver_response_messages ||
+ outnet->dtenv->log_forwarder_response_messages))
+ dt_msg_send_outside_response(outnet->dtenv, &sq->addr, c->type,
+ sq->zone, sq->zonelen, sq->qbuf, sq->qbuflen,
+ &sq->last_sent_time, sq->outnet->now_tv, c->buffer);
+#endif
+ if(!fallback_tcp) {
+ if( (sq->status == serviced_query_UDP_EDNS
+ ||sq->status == serviced_query_UDP_EDNS_FRAG)
+ && (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer))
+ == LDNS_RCODE_FORMERR || LDNS_RCODE_WIRE(
+ sldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOTIMPL)) {
+ /* try to get an answer by falling back without EDNS */
+ verbose(VERB_ALGO, "serviced query: attempt without EDNS");
+ sq->status = serviced_query_UDP_EDNS_fallback;
+ sq->retry = 0;
+ if(!serviced_udp_send(sq, c->buffer)) {
+ serviced_callbacks(sq, NETEVENT_CLOSED, c, rep);
+ }
+ return 0;
+ } else if(sq->status == serviced_query_PROBE_EDNS) {
+ /* probe without EDNS succeeds, so we conclude that this
+ * host likely has EDNS packets dropped */
+ log_addr(VERB_DETAIL, "timeouts, concluded that connection to "
+ "host drops EDNS packets", &sq->addr, sq->addrlen);
+ /* only store noEDNS in cache if domain is noDNSSEC */
+ if(!sq->want_dnssec)
+ if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen,
+ sq->zone, sq->zonelen, -1, (time_t)now.tv_sec)) {
+ log_err("Out of memory caching no edns for host");
+ }
+ sq->status = serviced_query_UDP;
+ } else if(sq->status == serviced_query_UDP_EDNS &&
+ !sq->edns_lame_known) {
+ /* now we know that edns queries received answers store that */
+ log_addr(VERB_ALGO, "serviced query: EDNS works for",
+ &sq->addr, sq->addrlen);
+ if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen,
+ sq->zone, sq->zonelen, 0, (time_t)now.tv_sec)) {
+ log_err("Out of memory caching edns works");
+ }
+ sq->edns_lame_known = 1;
+ } else if(sq->status == serviced_query_UDP_EDNS_fallback &&
+ !sq->edns_lame_known && (LDNS_RCODE_WIRE(
+ sldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOERROR ||
+ LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) ==
+ LDNS_RCODE_NXDOMAIN || LDNS_RCODE_WIRE(sldns_buffer_begin(
+ c->buffer)) == LDNS_RCODE_YXDOMAIN)) {
+ /* the fallback produced a result that looks promising, note
+ * that this server should be approached without EDNS */
+ /* only store noEDNS in cache if domain is noDNSSEC */
+ if(!sq->want_dnssec) {
+ log_addr(VERB_ALGO, "serviced query: EDNS fails for",
+ &sq->addr, sq->addrlen);
+ if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen,
+ sq->zone, sq->zonelen, -1, (time_t)now.tv_sec)) {
+ log_err("Out of memory caching no edns for host");
+ }
+ } else {
+ log_addr(VERB_ALGO, "serviced query: EDNS fails, but "
+ "not stored because need DNSSEC for", &sq->addr,
+ sq->addrlen);
+ }
+ sq->status = serviced_query_UDP;
+ }
+ if(now.tv_sec > sq->last_sent_time.tv_sec ||
+ (now.tv_sec == sq->last_sent_time.tv_sec &&
+ now.tv_usec > sq->last_sent_time.tv_usec)) {
+ /* convert from microseconds to milliseconds */
+ int roundtime = ((int)(now.tv_sec - sq->last_sent_time.tv_sec))*1000
+ + ((int)now.tv_usec - (int)sq->last_sent_time.tv_usec)/1000;
+ verbose(VERB_ALGO, "measured roundtrip at %d msec", roundtime);
+ log_assert(roundtime >= 0);
+ /* in case the system hibernated, do not enter a huge value,
+ * above this value gives trouble with server selection */
+ if(roundtime < 60000) {
+ if(!infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen,
+ sq->zone, sq->zonelen, sq->qtype, roundtime,
+ sq->last_rtt, (time_t)now.tv_sec))
+ log_err("out of memory noting rtt.");
+ }
+ }
+ } /* end of if_!fallback_tcp */
+ /* perform TC flag check and TCP fallback after updating our
+ * cache entries for EDNS status and RTT times */
+ if(LDNS_TC_WIRE(sldns_buffer_begin(c->buffer)) || fallback_tcp) {
+ /* fallback to TCP */
+ /* this discards partial UDP contents */
+ if(sq->status == serviced_query_UDP_EDNS ||
+ sq->status == serviced_query_UDP_EDNS_FRAG ||
+ sq->status == serviced_query_UDP_EDNS_fallback)
+ /* if we have unfinished EDNS_fallback, start again */
+ sq->status = serviced_query_TCP_EDNS;
+ else sq->status = serviced_query_TCP;
+ serviced_tcp_initiate(sq, c->buffer);
+ return 0;
+ }
+ /* yay! an answer */
+ serviced_callbacks(sq, error, c, rep);
+ return 0;
+}
+
+struct serviced_query*
+outnet_serviced_query(struct outside_network* outnet,
+ uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass,
+ uint16_t flags, int dnssec, int want_dnssec, int nocaps,
+ int tcp_upstream, int ssl_upstream, struct sockaddr_storage* addr,
+ socklen_t addrlen, uint8_t* zone, size_t zonelen,
+ comm_point_callback_t* callback, void* callback_arg,
+ sldns_buffer* buff)
+{
+ struct serviced_query* sq;
+ struct service_callback* cb;
+ serviced_gen_query(buff, qname, qnamelen, qtype, qclass, flags);
+ sq = lookup_serviced(outnet, buff, dnssec, addr, addrlen);
+ /* duplicate entries are included in the callback list, because
+ * there is a counterpart registration by our caller that needs to
+ * be doubly-removed (with callbacks perhaps). */
+ if(!(cb = (struct service_callback*)malloc(sizeof(*cb))))
+ return NULL;
+ if(!sq) {
+ /* make new serviced query entry */
+ sq = serviced_create(outnet, buff, dnssec, want_dnssec, nocaps,
+ tcp_upstream, ssl_upstream, addr, addrlen, zone,
+ zonelen, (int)qtype);
+ if(!sq) {
+ free(cb);
+ return NULL;
+ }
+ /* perform first network action */
+ if(outnet->do_udp && !(tcp_upstream || ssl_upstream)) {
+ if(!serviced_udp_send(sq, buff)) {
+ (void)rbtree_delete(outnet->serviced, sq);
+ free(sq->qbuf);
+ free(sq->zone);
+ free(sq);
+ free(cb);
+ return NULL;
+ }
+ } else {
+ if(!serviced_tcp_send(sq, buff)) {
+ (void)rbtree_delete(outnet->serviced, sq);
+ free(sq->qbuf);
+ free(sq->zone);
+ free(sq);
+ free(cb);
+ return NULL;
+ }
+ }
+ }
+ /* add callback to list of callbacks */
+ cb->cb = callback;
+ cb->cb_arg = callback_arg;
+ cb->next = sq->cblist;
+ sq->cblist = cb;
+ return sq;
+}
+
+/** remove callback from list */
+static void
+callback_list_remove(struct serviced_query* sq, void* cb_arg)
+{
+ struct service_callback** pp = &sq->cblist;
+ while(*pp) {
+ if((*pp)->cb_arg == cb_arg) {
+ struct service_callback* del = *pp;
+ *pp = del->next;
+ free(del);
+ return;
+ }
+ pp = &(*pp)->next;
+ }
+}
+
+void outnet_serviced_query_stop(struct serviced_query* sq, void* cb_arg)
+{
+ if(!sq)
+ return;
+ callback_list_remove(sq, cb_arg);
+ /* if callbacks() routine scheduled deletion, let it do that */
+ if(!sq->cblist && !sq->to_be_deleted) {
+#ifdef UNBOUND_DEBUG
+ rbnode_t* rem =
+#else
+ (void)
+#endif
+ rbtree_delete(sq->outnet->serviced, sq);
+ log_assert(rem); /* should be present */
+ serviced_delete(sq);
+ }
+}
+
+/** get memory used by waiting tcp entry (in use or not) */
+static size_t
+waiting_tcp_get_mem(struct waiting_tcp* w)
+{
+ size_t s;
+ if(!w) return 0;
+ s = sizeof(*w) + w->pkt_len;
+ if(w->timer)
+ s += comm_timer_get_mem(w->timer);
+ return s;
+}
+
+/** get memory used by port if */
+static size_t
+if_get_mem(struct port_if* pif)
+{
+ size_t s;
+ int i;
+ s = sizeof(*pif) + sizeof(int)*pif->avail_total +
+ sizeof(struct port_comm*)*pif->maxout;
+ for(i=0; i<pif->inuse; i++)
+ s += sizeof(*pif->out[i]) +
+ comm_point_get_mem(pif->out[i]->cp);
+ return s;
+}
+
+/** get memory used by waiting udp */
+static size_t
+waiting_udp_get_mem(struct pending* w)
+{
+ size_t s;
+ s = sizeof(*w) + comm_timer_get_mem(w->timer) + w->pkt_len;
+ return s;
+}
+
+size_t outnet_get_mem(struct outside_network* outnet)
+{
+ size_t i;
+ int k;
+ struct waiting_tcp* w;
+ struct pending* u;
+ struct serviced_query* sq;
+ struct service_callback* sb;
+ struct port_comm* pc;
+ size_t s = sizeof(*outnet) + sizeof(*outnet->base) +
+ sizeof(*outnet->udp_buff) +
+ sldns_buffer_capacity(outnet->udp_buff);
+ /* second buffer is not ours */
+ for(pc = outnet->unused_fds; pc; pc = pc->next) {
+ s += sizeof(*pc) + comm_point_get_mem(pc->cp);
+ }
+ for(k=0; k<outnet->num_ip4; k++)
+ s += if_get_mem(&outnet->ip4_ifs[k]);
+ for(k=0; k<outnet->num_ip6; k++)
+ s += if_get_mem(&outnet->ip6_ifs[k]);
+ for(u=outnet->udp_wait_first; u; u=u->next_waiting)
+ s += waiting_udp_get_mem(u);
+
+ s += sizeof(struct pending_tcp*)*outnet->num_tcp;
+ for(i=0; i<outnet->num_tcp; i++) {
+ s += sizeof(struct pending_tcp);
+ s += comm_point_get_mem(outnet->tcp_conns[i]->c);
+ if(outnet->tcp_conns[i]->query)
+ s += waiting_tcp_get_mem(outnet->tcp_conns[i]->query);
+ }
+ for(w=outnet->tcp_wait_first; w; w = w->next_waiting)
+ s += waiting_tcp_get_mem(w);
+ s += sizeof(*outnet->pending);
+ s += (sizeof(struct pending) + comm_timer_get_mem(NULL)) *
+ outnet->pending->count;
+ s += sizeof(*outnet->serviced);
+ s += outnet->svcd_overhead;
+ RBTREE_FOR(sq, struct serviced_query*, outnet->serviced) {
+ s += sizeof(*sq) + sq->qbuflen;
+ for(sb = sq->cblist; sb; sb = sb->next)
+ s += sizeof(*sb);
+ }
+ return s;
+}
+
+size_t
+serviced_get_mem(struct serviced_query* sq)
+{
+ struct service_callback* sb;
+ size_t s;
+ s = sizeof(*sq) + sq->qbuflen;
+ for(sb = sq->cblist; sb; sb = sb->next)
+ s += sizeof(*sb);
+ if(sq->status == serviced_query_UDP_EDNS ||
+ sq->status == serviced_query_UDP ||
+ sq->status == serviced_query_PROBE_EDNS ||
+ sq->status == serviced_query_UDP_EDNS_FRAG ||
+ sq->status == serviced_query_UDP_EDNS_fallback) {
+ s += sizeof(struct pending);
+ s += comm_timer_get_mem(NULL);
+ } else {
+ /* does not have size of the pkt pointer */
+ /* always has a timer except on malloc failures */
+
+ /* these sizes are part of the main outside network mem */
+ /*
+ s += sizeof(struct waiting_tcp);
+ s += comm_timer_get_mem(NULL);
+ */
+ }
+ return s;
+}
+
diff --git a/external/unbound/services/outside_network.h b/external/unbound/services/outside_network.h
new file mode 100644
index 000000000..9959676d3
--- /dev/null
+++ b/external/unbound/services/outside_network.h
@@ -0,0 +1,554 @@
+/*
+ * services/outside_network.h - listen to answers from the network
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file has functions to send queries to authoritative servers,
+ * and wait for the pending answer, with timeouts.
+ */
+
+#ifndef OUTSIDE_NETWORK_H
+#define OUTSIDE_NETWORK_H
+
+#include "util/rbtree.h"
+#include "util/netevent.h"
+#include "dnstap/dnstap_config.h"
+struct pending;
+struct pending_timeout;
+struct ub_randstate;
+struct pending_tcp;
+struct waiting_tcp;
+struct waiting_udp;
+struct infra_cache;
+struct port_comm;
+struct port_if;
+struct sldns_buffer;
+struct serviced_query;
+struct dt_env;
+
+/**
+ * Send queries to outside servers and wait for answers from servers.
+ * Contains answer-listen sockets.
+ */
+struct outside_network {
+ /** Base for select calls */
+ struct comm_base* base;
+ /** pointer to time in seconds */
+ time_t* now_secs;
+ /** pointer to time in microseconds */
+ struct timeval* now_tv;
+
+ /** buffer shared by UDP connections, since there is only one
+ datagram at any time. */
+ struct sldns_buffer* udp_buff;
+ /** serviced_callbacks malloc overhead when processing multiple
+ * identical serviced queries to the same server. */
+ size_t svcd_overhead;
+ /** use x20 bits to encode additional ID random bits */
+ int use_caps_for_id;
+ /** outside network wants to quit. Stop queued msgs from sent. */
+ int want_to_quit;
+
+ /** number of unwanted replies received (for statistics) */
+ size_t unwanted_replies;
+ /** cumulative total of unwanted replies (for defense) */
+ size_t unwanted_total;
+ /** threshold when to take defensive action. If 0 then never. */
+ size_t unwanted_threshold;
+ /** what action to take, called when defensive action is needed */
+ void (*unwanted_action)(void*);
+ /** user param for action */
+ void* unwanted_param;
+
+ /** linked list of available commpoints, unused file descriptors,
+ * for use as outgoing UDP ports. cp.fd=-1 in them. */
+ struct port_comm* unused_fds;
+ /** if udp is done */
+ int do_udp;
+ /** if udp is delay-closed (delayed answers do not meet closed port)*/
+ int delayclose;
+ /** timeout for delayclose */
+ struct timeval delay_tv;
+
+ /** array of outgoing IP4 interfaces */
+ struct port_if* ip4_ifs;
+ /** number of outgoing IP4 interfaces */
+ int num_ip4;
+
+ /** array of outgoing IP6 interfaces */
+ struct port_if* ip6_ifs;
+ /** number of outgoing IP6 interfaces */
+ int num_ip6;
+
+ /** pending udp queries waiting to be sent out, waiting for fd */
+ struct pending* udp_wait_first;
+ /** last pending udp query in list */
+ struct pending* udp_wait_last;
+
+ /** pending udp answers. sorted by id, addr */
+ rbtree_t* pending;
+ /** serviced queries, sorted by qbuf, addr, dnssec */
+ rbtree_t* serviced;
+ /** host cache, pointer but not owned by outnet. */
+ struct infra_cache* infra;
+ /** where to get random numbers */
+ struct ub_randstate* rnd;
+ /** ssl context to create ssl wrapped TCP with DNS connections */
+ void* sslctx;
+#ifdef USE_DNSTAP
+ /** dnstap environment */
+ struct dt_env* dtenv;
+#endif
+
+ /**
+ * Array of tcp pending used for outgoing TCP connections.
+ * Each can be used to establish a TCP connection with a server.
+ * The file descriptors are -1 if they are free, and need to be
+ * opened for the tcp connection. Can be used for ip4 and ip6.
+ */
+ struct pending_tcp **tcp_conns;
+ /** number of tcp communication points. */
+ size_t num_tcp;
+ /** number of tcp communication points in use. */
+ size_t num_tcp_outgoing;
+ /** list of tcp comm points that are free for use */
+ struct pending_tcp* tcp_free;
+ /** list of tcp queries waiting for a buffer */
+ struct waiting_tcp* tcp_wait_first;
+ /** last of waiting query list */
+ struct waiting_tcp* tcp_wait_last;
+};
+
+/**
+ * Outgoing interface. Ports available and currently used are tracked
+ * per interface
+ */
+struct port_if {
+ /** address ready to allocate new socket (except port no). */
+ struct sockaddr_storage addr;
+ /** length of addr field */
+ socklen_t addrlen;
+
+ /** the available ports array. These are unused.
+ * Only the first total-inuse part is filled. */
+ int* avail_ports;
+ /** the total number of available ports (size of the array) */
+ int avail_total;
+
+ /** array of the commpoints currently in use.
+ * allocated for max number of fds, first part in use. */
+ struct port_comm** out;
+ /** max number of fds, size of out array */
+ int maxout;
+ /** number of commpoints (and thus also ports) in use */
+ int inuse;
+};
+
+/**
+ * Outgoing commpoint for UDP port.
+ */
+struct port_comm {
+ /** next in free list */
+ struct port_comm* next;
+ /** which port number (when in use) */
+ int number;
+ /** interface it is used in */
+ struct port_if* pif;
+ /** index in the out array of the interface */
+ int index;
+ /** number of outstanding queries on this port */
+ int num_outstanding;
+ /** UDP commpoint, fd=-1 if not in use */
+ struct comm_point* cp;
+};
+
+/**
+ * A query that has an answer pending for it.
+ */
+struct pending {
+ /** redblacktree entry, key is the pending struct(id, addr). */
+ rbnode_t node;
+ /** the ID for the query. int so that a value out of range can
+ * be used to signify a pending that is for certain not present in
+ * the rbtree. (and for which deletion is safe). */
+ unsigned int id;
+ /** remote address. */
+ struct sockaddr_storage addr;
+ /** length of addr field in use. */
+ socklen_t addrlen;
+ /** comm point it was sent on (and reply must come back on). */
+ struct port_comm* pc;
+ /** timeout event */
+ struct comm_timer* timer;
+ /** callback for the timeout, error or reply to the message */
+ comm_point_callback_t* cb;
+ /** callback user argument */
+ void* cb_arg;
+ /** the outside network it is part of */
+ struct outside_network* outnet;
+ /** the corresponding serviced_query */
+ struct serviced_query* sq;
+
+ /*---- filled if udp pending is waiting -----*/
+ /** next in waiting list. */
+ struct pending* next_waiting;
+ /** timeout in msec */
+ int timeout;
+ /** The query itself, the query packet to send. */
+ uint8_t* pkt;
+ /** length of query packet. */
+ size_t pkt_len;
+};
+
+/**
+ * Pending TCP query to server.
+ */
+struct pending_tcp {
+ /** next in list of free tcp comm points, or NULL. */
+ struct pending_tcp* next_free;
+ /** the ID for the query; checked in reply */
+ uint16_t id;
+ /** tcp comm point it was sent on (and reply must come back on). */
+ struct comm_point* c;
+ /** the query being serviced, NULL if the pending_tcp is unused. */
+ struct waiting_tcp* query;
+};
+
+/**
+ * Query waiting for TCP buffer.
+ */
+struct waiting_tcp {
+ /**
+ * next in waiting list.
+ * if pkt==0, this points to the pending_tcp structure.
+ */
+ struct waiting_tcp* next_waiting;
+ /** timeout event; timer keeps running whether the query is
+ * waiting for a buffer or the tcp reply is pending */
+ struct comm_timer* timer;
+ /** the outside network it is part of */
+ struct outside_network* outnet;
+ /** remote address. */
+ struct sockaddr_storage addr;
+ /** length of addr field in use. */
+ socklen_t addrlen;
+ /**
+ * The query itself, the query packet to send.
+ * allocated after the waiting_tcp structure.
+ * set to NULL when the query is serviced and it part of pending_tcp.
+ * if this is NULL, the next_waiting points to the pending_tcp.
+ */
+ uint8_t* pkt;
+ /** length of query packet. */
+ size_t pkt_len;
+ /** callback for the timeout, error or reply to the message */
+ comm_point_callback_t* cb;
+ /** callback user argument */
+ void* cb_arg;
+ /** if it uses ssl upstream */
+ int ssl_upstream;
+};
+
+/**
+ * Callback to party interested in serviced query results.
+ */
+struct service_callback {
+ /** next in callback list */
+ struct service_callback* next;
+ /** callback function */
+ comm_point_callback_t* cb;
+ /** user argument for callback function */
+ void* cb_arg;
+};
+
+/** fallback size for fragmentation for EDNS in IPv4 */
+#define EDNS_FRAG_SIZE_IP4 1472
+/** fallback size for EDNS in IPv6, fits one fragment with ip6-tunnel-ids */
+#define EDNS_FRAG_SIZE_IP6 1232
+
+/**
+ * Query service record.
+ * Contains query and destination. UDP, TCP, EDNS are all tried.
+ * complete with retries and timeouts. A number of interested parties can
+ * receive a callback.
+ */
+struct serviced_query {
+ /** The rbtree node, key is this record */
+ rbnode_t node;
+ /** The query that needs to be answered. Starts with flags u16,
+ * then qdcount, ..., including qname, qtype, qclass. Does not include
+ * EDNS record. */
+ uint8_t* qbuf;
+ /** length of qbuf. */
+ size_t qbuflen;
+ /** If an EDNS section is included, the DO/CD bit will be turned on. */
+ int dnssec;
+ /** We want signatures, or else the answer is likely useless */
+ int want_dnssec;
+ /** ignore capsforid */
+ int nocaps;
+ /** tcp upstream used, use tcp, or ssl_upstream for SSL */
+ int tcp_upstream, ssl_upstream;
+ /** where to send it */
+ struct sockaddr_storage addr;
+ /** length of addr field in use. */
+ socklen_t addrlen;
+ /** zone name, uncompressed domain name in wireformat */
+ uint8_t* zone;
+ /** length of zone name */
+ size_t zonelen;
+ /** qtype */
+ int qtype;
+ /** current status */
+ enum serviced_query_status {
+ /** initial status */
+ serviced_initial,
+ /** UDP with EDNS sent */
+ serviced_query_UDP_EDNS,
+ /** UDP without EDNS sent */
+ serviced_query_UDP,
+ /** TCP with EDNS sent */
+ serviced_query_TCP_EDNS,
+ /** TCP without EDNS sent */
+ serviced_query_TCP,
+ /** probe to test EDNS lameness (EDNS is dropped) */
+ serviced_query_PROBE_EDNS,
+ /** probe to test noEDNS0 (EDNS gives FORMERRorNOTIMP) */
+ serviced_query_UDP_EDNS_fallback,
+ /** probe to test TCP noEDNS0 (EDNS gives FORMERRorNOTIMP) */
+ serviced_query_TCP_EDNS_fallback,
+ /** send UDP query with EDNS1480 (or 1280) */
+ serviced_query_UDP_EDNS_FRAG
+ }
+ /** variable with current status */
+ status;
+ /** true if serviced_query is scheduled for deletion already */
+ int to_be_deleted;
+ /** number of UDP retries */
+ int retry;
+ /** time last UDP was sent */
+ struct timeval last_sent_time;
+ /** rtt of last (UDP) message */
+ int last_rtt;
+ /** do we know edns probe status already, for UDP_EDNS queries */
+ int edns_lame_known;
+ /** outside network this is part of */
+ struct outside_network* outnet;
+ /** list of interested parties that need callback on results. */
+ struct service_callback* cblist;
+ /** the UDP or TCP query that is pending, see status which */
+ void* pending;
+};
+
+/**
+ * Create outside_network structure with N udp ports.
+ * @param base: the communication base to use for event handling.
+ * @param bufsize: size for network buffers.
+ * @param num_ports: number of udp ports to open per interface.
+ * @param ifs: interface names (or NULL for default interface).
+ * These interfaces must be able to access all authoritative servers.
+ * @param num_ifs: number of names in array ifs.
+ * @param do_ip4: service IP4.
+ * @param do_ip6: service IP6.
+ * @param num_tcp: number of outgoing tcp buffers to preallocate.
+ * @param infra: pointer to infra cached used for serviced queries.
+ * @param rnd: stored to create random numbers for serviced queries.
+ * @param use_caps_for_id: enable to use 0x20 bits to encode id randomness.
+ * @param availports: array of available ports.
+ * @param numavailports: number of available ports in array.
+ * @param unwanted_threshold: when to take defensive action.
+ * @param unwanted_action: the action to take.
+ * @param unwanted_param: user parameter to action.
+ * @param do_udp: if udp is done.
+ * @param sslctx: context to create outgoing connections with (if enabled).
+ * @param delayclose: if not 0, udp sockets are delayed before timeout closure.
+ * msec to wait on timeouted udp sockets.
+ * @param dtenv: environment to send dnstap events with (if enabled).
+ * @return: the new structure (with no pending answers) or NULL on error.
+ */
+struct outside_network* outside_network_create(struct comm_base* base,
+ size_t bufsize, size_t num_ports, char** ifs, int num_ifs,
+ int do_ip4, int do_ip6, size_t num_tcp, struct infra_cache* infra,
+ struct ub_randstate* rnd, int use_caps_for_id, int* availports,
+ int numavailports, size_t unwanted_threshold,
+ void (*unwanted_action)(void*), void* unwanted_param, int do_udp,
+ void* sslctx, int delayclose, struct dt_env *dtenv);
+
+/**
+ * Delete outside_network structure.
+ * @param outnet: object to delete.
+ */
+void outside_network_delete(struct outside_network* outnet);
+
+/**
+ * Prepare for quit. Sends no more queries, even if queued up.
+ * @param outnet: object to prepare for removal
+ */
+void outside_network_quit_prepare(struct outside_network* outnet);
+
+/**
+ * Send UDP query, create pending answer.
+ * Changes the ID for the query to be random and unique for that destination.
+ * @param sq: serviced query.
+ * @param packet: wireformat query to send to destination.
+ * @param timeout: in milliseconds from now.
+ * @param callback: function to call on error, timeout or reply.
+ * @param callback_arg: user argument for callback function.
+ * @return: NULL on error for malloc or socket. Else the pending query object.
+ */
+struct pending* pending_udp_query(struct serviced_query* sq,
+ struct sldns_buffer* packet, int timeout, comm_point_callback_t* callback,
+ void* callback_arg);
+
+/**
+ * Send TCP query. May wait for TCP buffer. Selects ID to be random, and
+ * checks id.
+ * @param sq: serviced query.
+ * @param packet: wireformat query to send to destination. copied from.
+ * @param timeout: in seconds from now.
+ * Timer starts running now. Timer may expire if all buffers are used,
+ * without any query been sent to the server yet.
+ * @param callback: function to call on error, timeout or reply.
+ * @param callback_arg: user argument for callback function.
+ * @return: false on error for malloc or socket. Else the pending TCP object.
+ */
+struct waiting_tcp* pending_tcp_query(struct serviced_query* sq,
+ struct sldns_buffer* packet, int timeout, comm_point_callback_t* callback,
+ void* callback_arg);
+
+/**
+ * Delete pending answer.
+ * @param outnet: outside network the pending query is part of.
+ * Internal feature: if outnet is NULL, p is not unlinked from rbtree.
+ * @param p: deleted
+ */
+void pending_delete(struct outside_network* outnet, struct pending* p);
+
+/**
+ * Perform a serviced query to the authoritative servers.
+ * Duplicate efforts are detected, and EDNS, TCP and UDP retry is performed.
+ * @param outnet: outside network, with rbtree of serviced queries.
+ * @param qname: what qname to query.
+ * @param qnamelen: length of qname in octets including 0 root label.
+ * @param qtype: rrset type to query (host format)
+ * @param qclass: query class. (host format)
+ * @param flags: flags u16 (host format), includes opcode, CD bit.
+ * @param dnssec: if set, DO bit is set in EDNS queries.
+ * If the value includes BIT_CD, CD bit is set when in EDNS queries.
+ * If the value includes BIT_DO, DO bit is set when in EDNS queries.
+ * @param want_dnssec: signatures are needed, without EDNS the answer is
+ * likely to be useless.
+ * @param nocaps: ignore use_caps_for_id and use unperturbed qname.
+ * @param tcp_upstream: use TCP for upstream queries.
+ * @param ssl_upstream: use SSL for upstream queries.
+ * @param callback: callback function.
+ * @param callback_arg: user argument to callback function.
+ * @param addr: to which server to send the query.
+ * @param addrlen: length of addr.
+ * @param zone: name of the zone of the delegation point. wireformat dname.
+ This is the delegation point name for which the server is deemed
+ authoritative.
+ * @param zonelen: length of zone.
+ * @param buff: scratch buffer to create query contents in. Empty on exit.
+ * @return 0 on error, or pointer to serviced query that is used to answer
+ * this serviced query may be shared with other callbacks as well.
+ */
+struct serviced_query* outnet_serviced_query(struct outside_network* outnet,
+ uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass,
+ uint16_t flags, int dnssec, int want_dnssec, int nocaps,
+ int tcp_upstream, int ssl_upstream, struct sockaddr_storage* addr,
+ socklen_t addrlen, uint8_t* zone, size_t zonelen,
+ comm_point_callback_t* callback, void* callback_arg,
+ struct sldns_buffer* buff);
+
+/**
+ * Remove service query callback.
+ * If that leads to zero callbacks, the query is completely cancelled.
+ * @param sq: serviced query to adjust.
+ * @param cb_arg: callback argument of callback that needs removal.
+ * same as the callback_arg to outnet_serviced_query().
+ */
+void outnet_serviced_query_stop(struct serviced_query* sq, void* cb_arg);
+
+/**
+ * Get memory size in use by outside network.
+ * Counts buffers and outstanding query (serviced queries) malloced data.
+ * @param outnet: outside network structure.
+ * @return size in bytes.
+ */
+size_t outnet_get_mem(struct outside_network* outnet);
+
+/**
+ * Get memory size in use by serviced query while it is servicing callbacks.
+ * This takes into account the pre-deleted status of it; it will be deleted
+ * when the callbacks are done.
+ * @param sq: serviced query.
+ * @return size in bytes.
+ */
+size_t serviced_get_mem(struct serviced_query* sq);
+
+/** callback for incoming udp answers from the network */
+int outnet_udp_cb(struct comm_point* c, void* arg, int error,
+ struct comm_reply *reply_info);
+
+/** callback for pending tcp connections */
+int outnet_tcp_cb(struct comm_point* c, void* arg, int error,
+ struct comm_reply *reply_info);
+
+/** callback for udp timeout */
+void pending_udp_timer_cb(void *arg);
+
+/** callback for udp delay for timeout */
+void pending_udp_timer_delay_cb(void *arg);
+
+/** callback for outgoing TCP timer event */
+void outnet_tcptimer(void* arg);
+
+/** callback for serviced query UDP answers */
+int serviced_udp_callback(struct comm_point* c, void* arg, int error,
+ struct comm_reply* rep);
+
+/** TCP reply or error callback for serviced queries */
+int serviced_tcp_callback(struct comm_point* c, void* arg, int error,
+ struct comm_reply* rep);
+
+/** compare function of pending rbtree */
+int pending_cmp(const void* key1, const void* key2);
+
+/** compare function of serviced query rbtree */
+int serviced_cmp(const void* key1, const void* key2);
+
+#endif /* OUTSIDE_NETWORK_H */