diff options
Diffstat (limited to '')
-rw-r--r-- | external/unbound/services/cache/dns.c | 816 | ||||
-rw-r--r-- | external/unbound/services/cache/dns.h | 194 | ||||
-rw-r--r-- | external/unbound/services/cache/infra.c | 569 | ||||
-rw-r--r-- | external/unbound/services/cache/infra.h | 309 | ||||
-rw-r--r-- | external/unbound/services/cache/rrset.c | 417 | ||||
-rw-r--r-- | external/unbound/services/cache/rrset.h | 231 | ||||
-rw-r--r-- | external/unbound/services/listen_dnsport.c | 1062 | ||||
-rw-r--r-- | external/unbound/services/listen_dnsport.h | 210 | ||||
-rw-r--r-- | external/unbound/services/localzone.c | 1400 | ||||
-rw-r--r-- | external/unbound/services/localzone.h | 317 | ||||
-rw-r--r-- | external/unbound/services/mesh.c | 1209 | ||||
-rw-r--r-- | external/unbound/services/mesh.h | 572 | ||||
-rw-r--r-- | external/unbound/services/modstack.c | 215 | ||||
-rw-r--r-- | external/unbound/services/modstack.h | 113 | ||||
-rw-r--r-- | external/unbound/services/outbound_list.c | 89 | ||||
-rw-r--r-- | external/unbound/services/outbound_list.h | 105 | ||||
-rw-r--r-- | external/unbound/services/outside_network.c | 2052 | ||||
-rw-r--r-- | external/unbound/services/outside_network.h | 554 |
18 files changed, 10434 insertions, 0 deletions
diff --git a/external/unbound/services/cache/dns.c b/external/unbound/services/cache/dns.c new file mode 100644 index 000000000..c663b8e8b --- /dev/null +++ b/external/unbound/services/cache/dns.c @@ -0,0 +1,816 @@ +/* + * services/cache/dns.c - Cache services for DNS using msg and rrset caches. + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file contains the DNS cache. + */ +#include "config.h" +#include "iterator/iter_delegpt.h" +#include "validator/val_nsec.h" +#include "services/cache/dns.h" +#include "services/cache/rrset.h" +#include "util/data/msgreply.h" +#include "util/data/packed_rrset.h" +#include "util/data/dname.h" +#include "util/module.h" +#include "util/net_help.h" +#include "util/regional.h" +#include "util/config_file.h" +#include "ldns/sbuffer.h" + +/** store rrsets in the rrset cache. + * @param env: module environment with caches. + * @param rep: contains list of rrsets to store. + * @param now: current time. + * @param leeway: during prefetch how much leeway to update TTLs. + * This makes rrsets (other than type NS) timeout sooner so they get + * updated with a new full TTL. + * Type NS does not get this, because it must not be refreshed from the + * child domain, but keep counting down properly. + * @param pside: if from parentside discovered NS, so that its NS is okay + * in a prefetch situation to be updated (without becoming sticky). + * @param qrep: update rrsets here if cache is better + * @param region: for qrep allocs. + */ +static void +store_rrsets(struct module_env* env, struct reply_info* rep, time_t now, + time_t leeway, int pside, struct reply_info* qrep, + struct regional* region) +{ + size_t i; + /* see if rrset already exists in cache, if not insert it. */ + for(i=0; i<rep->rrset_count; i++) { + rep->ref[i].key = rep->rrsets[i]; + rep->ref[i].id = rep->rrsets[i]->id; + /* update ref if it was in the cache */ + switch(rrset_cache_update(env->rrset_cache, &rep->ref[i], + env->alloc, now + ((ntohs(rep->ref[i].key->rk.type)== + LDNS_RR_TYPE_NS && !pside)?0:leeway))) { + case 0: /* ref unchanged, item inserted */ + break; + case 2: /* ref updated, cache is superior */ + if(region) { + struct ub_packed_rrset_key* ck; + lock_rw_rdlock(&rep->ref[i].key->entry.lock); + /* if deleted rrset, do not copy it */ + if(rep->ref[i].key->id == 0) + ck = NULL; + else ck = packed_rrset_copy_region( + rep->ref[i].key, region, now); + lock_rw_unlock(&rep->ref[i].key->entry.lock); + if(ck) { + /* use cached copy if memory allows */ + qrep->rrsets[i] = ck; + } + } + /* no break: also copy key item */ + case 1: /* ref updated, item inserted */ + rep->rrsets[i] = rep->ref[i].key; + } + } +} + +void +dns_cache_store_msg(struct module_env* env, struct query_info* qinfo, + hashvalue_t hash, struct reply_info* rep, time_t leeway, int pside, + struct reply_info* qrep, struct regional* region) +{ + struct msgreply_entry* e; + time_t ttl = rep->ttl; + size_t i; + + /* store RRsets */ + for(i=0; i<rep->rrset_count; i++) { + rep->ref[i].key = rep->rrsets[i]; + rep->ref[i].id = rep->rrsets[i]->id; + } + + /* there was a reply_info_sortref(rep) here but it seems to be + * unnecessary, because the cache gets locked per rrset. */ + reply_info_set_ttls(rep, *env->now); + store_rrsets(env, rep, *env->now, leeway, pside, qrep, region); + if(ttl == 0) { + /* we do not store the message, but we did store the RRs, + * which could be useful for delegation information */ + verbose(VERB_ALGO, "TTL 0: dropped msg from cache"); + free(rep); + return; + } + + /* store msg in the cache */ + reply_info_sortref(rep); + if(!(e = query_info_entrysetup(qinfo, rep, hash))) { + log_err("store_msg: malloc failed"); + return; + } + slabhash_insert(env->msg_cache, hash, &e->entry, rep, env->alloc); +} + +/** find closest NS or DNAME and returns the rrset (locked) */ +static struct ub_packed_rrset_key* +find_closest_of_type(struct module_env* env, uint8_t* qname, size_t qnamelen, + uint16_t qclass, time_t now, uint16_t searchtype, int stripfront) +{ + struct ub_packed_rrset_key *rrset; + uint8_t lablen; + + if(stripfront) { + /* strip off so that DNAMEs have strict subdomain match */ + lablen = *qname; + qname += lablen + 1; + qnamelen -= lablen + 1; + } + + /* snip off front part of qname until the type is found */ + while(qnamelen > 0) { + if((rrset = rrset_cache_lookup(env->rrset_cache, qname, + qnamelen, searchtype, qclass, 0, now, 0))) + return rrset; + + /* snip off front label */ + lablen = *qname; + qname += lablen + 1; + qnamelen -= lablen + 1; + } + return NULL; +} + +/** add addr to additional section */ +static void +addr_to_additional(struct ub_packed_rrset_key* rrset, struct regional* region, + struct dns_msg* msg, time_t now) +{ + if((msg->rep->rrsets[msg->rep->rrset_count] = + packed_rrset_copy_region(rrset, region, now))) { + msg->rep->ar_numrrsets++; + msg->rep->rrset_count++; + } +} + +/** lookup message in message cache */ +static struct msgreply_entry* +msg_cache_lookup(struct module_env* env, uint8_t* qname, size_t qnamelen, + uint16_t qtype, uint16_t qclass, time_t now, int wr) +{ + struct lruhash_entry* e; + struct query_info k; + hashvalue_t h; + + k.qname = qname; + k.qname_len = qnamelen; + k.qtype = qtype; + k.qclass = qclass; + h = query_info_hash(&k); + e = slabhash_lookup(env->msg_cache, h, &k, wr); + + if(!e) return NULL; + if( now > ((struct reply_info*)e->data)->ttl ) { + lock_rw_unlock(&e->lock); + return NULL; + } + return (struct msgreply_entry*)e->key; +} + +/** find and add A and AAAA records for nameservers in delegpt */ +static int +find_add_addrs(struct module_env* env, uint16_t qclass, + struct regional* region, struct delegpt* dp, time_t now, + struct dns_msg** msg) +{ + struct delegpt_ns* ns; + struct msgreply_entry* neg; + struct ub_packed_rrset_key* akey; + for(ns = dp->nslist; ns; ns = ns->next) { + akey = rrset_cache_lookup(env->rrset_cache, ns->name, + ns->namelen, LDNS_RR_TYPE_A, qclass, 0, now, 0); + if(akey) { + if(!delegpt_add_rrset_A(dp, region, akey, 0)) { + lock_rw_unlock(&akey->entry.lock); + return 0; + } + if(msg) + addr_to_additional(akey, region, *msg, now); + lock_rw_unlock(&akey->entry.lock); + } else { + neg = msg_cache_lookup(env, ns->name, ns->namelen, + LDNS_RR_TYPE_A, qclass, now, 0); + if(neg) { + delegpt_add_neg_msg(dp, neg); + lock_rw_unlock(&neg->entry.lock); + } + } + akey = rrset_cache_lookup(env->rrset_cache, ns->name, + ns->namelen, LDNS_RR_TYPE_AAAA, qclass, 0, now, 0); + if(akey) { + if(!delegpt_add_rrset_AAAA(dp, region, akey, 0)) { + lock_rw_unlock(&akey->entry.lock); + return 0; + } + if(msg) + addr_to_additional(akey, region, *msg, now); + lock_rw_unlock(&akey->entry.lock); + } else { + neg = msg_cache_lookup(env, ns->name, ns->namelen, + LDNS_RR_TYPE_AAAA, qclass, now, 0); + if(neg) { + delegpt_add_neg_msg(dp, neg); + lock_rw_unlock(&neg->entry.lock); + } + } + } + return 1; +} + +/** find and add A and AAAA records for missing nameservers in delegpt */ +int +cache_fill_missing(struct module_env* env, uint16_t qclass, + struct regional* region, struct delegpt* dp) +{ + struct delegpt_ns* ns; + struct msgreply_entry* neg; + struct ub_packed_rrset_key* akey; + time_t now = *env->now; + for(ns = dp->nslist; ns; ns = ns->next) { + akey = rrset_cache_lookup(env->rrset_cache, ns->name, + ns->namelen, LDNS_RR_TYPE_A, qclass, 0, now, 0); + if(akey) { + if(!delegpt_add_rrset_A(dp, region, akey, ns->lame)) { + lock_rw_unlock(&akey->entry.lock); + return 0; + } + log_nametypeclass(VERB_ALGO, "found in cache", + ns->name, LDNS_RR_TYPE_A, qclass); + lock_rw_unlock(&akey->entry.lock); + } else { + neg = msg_cache_lookup(env, ns->name, ns->namelen, + LDNS_RR_TYPE_A, qclass, now, 0); + if(neg) { + delegpt_add_neg_msg(dp, neg); + lock_rw_unlock(&neg->entry.lock); + } + } + akey = rrset_cache_lookup(env->rrset_cache, ns->name, + ns->namelen, LDNS_RR_TYPE_AAAA, qclass, 0, now, 0); + if(akey) { + if(!delegpt_add_rrset_AAAA(dp, region, akey, ns->lame)) { + lock_rw_unlock(&akey->entry.lock); + return 0; + } + log_nametypeclass(VERB_ALGO, "found in cache", + ns->name, LDNS_RR_TYPE_AAAA, qclass); + lock_rw_unlock(&akey->entry.lock); + } else { + neg = msg_cache_lookup(env, ns->name, ns->namelen, + LDNS_RR_TYPE_AAAA, qclass, now, 0); + if(neg) { + delegpt_add_neg_msg(dp, neg); + lock_rw_unlock(&neg->entry.lock); + } + } + } + return 1; +} + +/** find and add DS or NSEC to delegation msg */ +static void +find_add_ds(struct module_env* env, struct regional* region, + struct dns_msg* msg, struct delegpt* dp, time_t now) +{ + /* Lookup the DS or NSEC at the delegation point. */ + struct ub_packed_rrset_key* rrset = rrset_cache_lookup( + env->rrset_cache, dp->name, dp->namelen, LDNS_RR_TYPE_DS, + msg->qinfo.qclass, 0, now, 0); + if(!rrset) { + /* NOTE: this won't work for alternate NSEC schemes + * (opt-in, NSEC3) */ + rrset = rrset_cache_lookup(env->rrset_cache, dp->name, + dp->namelen, LDNS_RR_TYPE_NSEC, msg->qinfo.qclass, + 0, now, 0); + /* Note: the PACKED_RRSET_NSEC_AT_APEX flag is not used. + * since this is a referral, we need the NSEC at the parent + * side of the zone cut, not the NSEC at apex side. */ + if(rrset && nsec_has_type(rrset, LDNS_RR_TYPE_DS)) { + lock_rw_unlock(&rrset->entry.lock); + rrset = NULL; /* discard wrong NSEC */ + } + } + if(rrset) { + /* add it to auth section. This is the second rrset. */ + if((msg->rep->rrsets[msg->rep->rrset_count] = + packed_rrset_copy_region(rrset, region, now))) { + msg->rep->ns_numrrsets++; + msg->rep->rrset_count++; + } + lock_rw_unlock(&rrset->entry.lock); + } +} + +struct dns_msg* +dns_msg_create(uint8_t* qname, size_t qnamelen, uint16_t qtype, + uint16_t qclass, struct regional* region, size_t capacity) +{ + struct dns_msg* msg = (struct dns_msg*)regional_alloc(region, + sizeof(struct dns_msg)); + if(!msg) + return NULL; + msg->qinfo.qname = regional_alloc_init(region, qname, qnamelen); + if(!msg->qinfo.qname) + return NULL; + msg->qinfo.qname_len = qnamelen; + msg->qinfo.qtype = qtype; + msg->qinfo.qclass = qclass; + /* non-packed reply_info, because it needs to grow the array */ + msg->rep = (struct reply_info*)regional_alloc_zero(region, + sizeof(struct reply_info)-sizeof(struct rrset_ref)); + if(!msg->rep) + return NULL; + msg->rep->flags = BIT_QR; /* with QR, no AA */ + msg->rep->qdcount = 1; + msg->rep->rrsets = (struct ub_packed_rrset_key**) + regional_alloc(region, + capacity*sizeof(struct ub_packed_rrset_key*)); + if(!msg->rep->rrsets) + return NULL; + return msg; +} + +int +dns_msg_authadd(struct dns_msg* msg, struct regional* region, + struct ub_packed_rrset_key* rrset, time_t now) +{ + if(!(msg->rep->rrsets[msg->rep->rrset_count++] = + packed_rrset_copy_region(rrset, region, now))) + return 0; + msg->rep->ns_numrrsets++; + return 1; +} + +struct delegpt* +dns_cache_find_delegation(struct module_env* env, uint8_t* qname, + size_t qnamelen, uint16_t qtype, uint16_t qclass, + struct regional* region, struct dns_msg** msg, time_t now) +{ + /* try to find closest NS rrset */ + struct ub_packed_rrset_key* nskey; + struct packed_rrset_data* nsdata; + struct delegpt* dp; + + nskey = find_closest_of_type(env, qname, qnamelen, qclass, now, + LDNS_RR_TYPE_NS, 0); + if(!nskey) /* hope the caller has hints to prime or something */ + return NULL; + nsdata = (struct packed_rrset_data*)nskey->entry.data; + /* got the NS key, create delegation point */ + dp = delegpt_create(region); + if(!dp || !delegpt_set_name(dp, region, nskey->rk.dname)) { + lock_rw_unlock(&nskey->entry.lock); + log_err("find_delegation: out of memory"); + return NULL; + } + /* create referral message */ + if(msg) { + /* allocate the array to as much as we could need: + * NS rrset + DS/NSEC rrset + + * A rrset for every NS RR + * AAAA rrset for every NS RR + */ + *msg = dns_msg_create(qname, qnamelen, qtype, qclass, region, + 2 + nsdata->count*2); + if(!*msg || !dns_msg_authadd(*msg, region, nskey, now)) { + lock_rw_unlock(&nskey->entry.lock); + log_err("find_delegation: out of memory"); + return NULL; + } + } + if(!delegpt_rrset_add_ns(dp, region, nskey, 0)) + log_err("find_delegation: addns out of memory"); + lock_rw_unlock(&nskey->entry.lock); /* first unlock before next lookup*/ + /* find and add DS/NSEC (if any) */ + if(msg) + find_add_ds(env, region, *msg, dp, now); + /* find and add A entries */ + if(!find_add_addrs(env, qclass, region, dp, now, msg)) + log_err("find_delegation: addrs out of memory"); + return dp; +} + +/** allocate dns_msg from query_info and reply_info */ +static struct dns_msg* +gen_dns_msg(struct regional* region, struct query_info* q, size_t num) +{ + struct dns_msg* msg = (struct dns_msg*)regional_alloc(region, + sizeof(struct dns_msg)); + if(!msg) + return NULL; + memcpy(&msg->qinfo, q, sizeof(struct query_info)); + msg->qinfo.qname = regional_alloc_init(region, q->qname, q->qname_len); + if(!msg->qinfo.qname) + return NULL; + /* allocate replyinfo struct and rrset key array separately */ + msg->rep = (struct reply_info*)regional_alloc(region, + sizeof(struct reply_info) - sizeof(struct rrset_ref)); + if(!msg->rep) + return NULL; + msg->rep->rrsets = (struct ub_packed_rrset_key**) + regional_alloc(region, + num * sizeof(struct ub_packed_rrset_key*)); + if(!msg->rep->rrsets) + return NULL; + return msg; +} + +/** generate dns_msg from cached message */ +static struct dns_msg* +tomsg(struct module_env* env, struct query_info* q, struct reply_info* r, + struct regional* region, time_t now, struct regional* scratch) +{ + struct dns_msg* msg; + size_t i; + if(now > r->ttl) + return NULL; + msg = gen_dns_msg(region, q, r->rrset_count); + if(!msg) + return NULL; + msg->rep->flags = r->flags; + msg->rep->qdcount = r->qdcount; + msg->rep->ttl = r->ttl - now; + if(r->prefetch_ttl > now) + msg->rep->prefetch_ttl = r->prefetch_ttl - now; + else msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl); + msg->rep->security = r->security; + msg->rep->an_numrrsets = r->an_numrrsets; + msg->rep->ns_numrrsets = r->ns_numrrsets; + msg->rep->ar_numrrsets = r->ar_numrrsets; + msg->rep->rrset_count = r->rrset_count; + msg->rep->authoritative = r->authoritative; + if(!rrset_array_lock(r->ref, r->rrset_count, now)) + return NULL; + if(r->an_numrrsets > 0 && (r->rrsets[0]->rk.type == htons( + LDNS_RR_TYPE_CNAME) || r->rrsets[0]->rk.type == htons( + LDNS_RR_TYPE_DNAME)) && !reply_check_cname_chain(r)) { + /* cname chain is now invalid, reconstruct msg */ + rrset_array_unlock(r->ref, r->rrset_count); + return NULL; + } + if(r->security == sec_status_secure && !reply_all_rrsets_secure(r)) { + /* message rrsets have changed status, revalidate */ + rrset_array_unlock(r->ref, r->rrset_count); + return NULL; + } + for(i=0; i<msg->rep->rrset_count; i++) { + msg->rep->rrsets[i] = packed_rrset_copy_region(r->rrsets[i], + region, now); + if(!msg->rep->rrsets[i]) { + rrset_array_unlock(r->ref, r->rrset_count); + return NULL; + } + } + rrset_array_unlock_touch(env->rrset_cache, scratch, r->ref, + r->rrset_count); + return msg; +} + +/** synthesize RRset-only response from cached RRset item */ +static struct dns_msg* +rrset_msg(struct ub_packed_rrset_key* rrset, struct regional* region, + time_t now, struct query_info* q) +{ + struct dns_msg* msg; + struct packed_rrset_data* d = (struct packed_rrset_data*) + rrset->entry.data; + if(now > d->ttl) + return NULL; + msg = gen_dns_msg(region, q, 1); /* only the CNAME (or other) RRset */ + if(!msg) + return NULL; + msg->rep->flags = BIT_QR; /* reply, no AA, no error */ + msg->rep->authoritative = 0; /* reply stored in cache can't be authoritative */ + msg->rep->qdcount = 1; + msg->rep->ttl = d->ttl - now; + msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl); + msg->rep->security = sec_status_unchecked; + msg->rep->an_numrrsets = 1; + msg->rep->ns_numrrsets = 0; + msg->rep->ar_numrrsets = 0; + msg->rep->rrset_count = 1; + msg->rep->rrsets[0] = packed_rrset_copy_region(rrset, region, now); + if(!msg->rep->rrsets[0]) /* copy CNAME */ + return NULL; + return msg; +} + +/** synthesize DNAME+CNAME response from cached DNAME item */ +static struct dns_msg* +synth_dname_msg(struct ub_packed_rrset_key* rrset, struct regional* region, + time_t now, struct query_info* q) +{ + struct dns_msg* msg; + struct ub_packed_rrset_key* ck; + struct packed_rrset_data* newd, *d = (struct packed_rrset_data*) + rrset->entry.data; + uint8_t* newname, *dtarg = NULL; + size_t newlen, dtarglen; + if(now > d->ttl) + return NULL; + /* only allow validated (with DNSSEC) DNAMEs used from cache + * for insecure DNAMEs, query again. */ + if(d->security != sec_status_secure) + return NULL; + msg = gen_dns_msg(region, q, 2); /* DNAME + CNAME RRset */ + if(!msg) + return NULL; + msg->rep->flags = BIT_QR; /* reply, no AA, no error */ + msg->rep->authoritative = 0; /* reply stored in cache can't be authoritative */ + msg->rep->qdcount = 1; + msg->rep->ttl = d->ttl - now; + msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl); + msg->rep->security = sec_status_unchecked; + msg->rep->an_numrrsets = 1; + msg->rep->ns_numrrsets = 0; + msg->rep->ar_numrrsets = 0; + msg->rep->rrset_count = 1; + msg->rep->rrsets[0] = packed_rrset_copy_region(rrset, region, now); + if(!msg->rep->rrsets[0]) /* copy DNAME */ + return NULL; + /* synth CNAME rrset */ + get_cname_target(rrset, &dtarg, &dtarglen); + if(!dtarg) + return NULL; + newlen = q->qname_len + dtarglen - rrset->rk.dname_len; + if(newlen > LDNS_MAX_DOMAINLEN) { + msg->rep->flags |= LDNS_RCODE_YXDOMAIN; + return msg; + } + newname = (uint8_t*)regional_alloc(region, newlen); + if(!newname) + return NULL; + /* new name is concatenation of qname front (without DNAME owner) + * and DNAME target name */ + memcpy(newname, q->qname, q->qname_len-rrset->rk.dname_len); + memmove(newname+(q->qname_len-rrset->rk.dname_len), dtarg, dtarglen); + /* create rest of CNAME rrset */ + ck = (struct ub_packed_rrset_key*)regional_alloc(region, + sizeof(struct ub_packed_rrset_key)); + if(!ck) + return NULL; + memset(&ck->entry, 0, sizeof(ck->entry)); + msg->rep->rrsets[1] = ck; + ck->entry.key = ck; + ck->rk.type = htons(LDNS_RR_TYPE_CNAME); + ck->rk.rrset_class = rrset->rk.rrset_class; + ck->rk.flags = 0; + ck->rk.dname = regional_alloc_init(region, q->qname, q->qname_len); + if(!ck->rk.dname) + return NULL; + ck->rk.dname_len = q->qname_len; + ck->entry.hash = rrset_key_hash(&ck->rk); + newd = (struct packed_rrset_data*)regional_alloc_zero(region, + sizeof(struct packed_rrset_data) + sizeof(size_t) + + sizeof(uint8_t*) + sizeof(time_t) + sizeof(uint16_t) + + newlen); + if(!newd) + return NULL; + ck->entry.data = newd; + newd->ttl = 0; /* 0 for synthesized CNAME TTL */ + newd->count = 1; + newd->rrsig_count = 0; + newd->trust = rrset_trust_ans_noAA; + newd->rr_len = (size_t*)((uint8_t*)newd + + sizeof(struct packed_rrset_data)); + newd->rr_len[0] = newlen + sizeof(uint16_t); + packed_rrset_ptr_fixup(newd); + newd->rr_ttl[0] = newd->ttl; + msg->rep->ttl = newd->ttl; + msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(newd->ttl); + sldns_write_uint16(newd->rr_data[0], newlen); + memmove(newd->rr_data[0] + sizeof(uint16_t), newname, newlen); + msg->rep->an_numrrsets ++; + msg->rep->rrset_count ++; + return msg; +} + +struct dns_msg* +dns_cache_lookup(struct module_env* env, + uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass, + struct regional* region, struct regional* scratch) +{ + struct lruhash_entry* e; + struct query_info k; + hashvalue_t h; + time_t now = *env->now; + struct ub_packed_rrset_key* rrset; + + /* lookup first, this has both NXdomains and ANSWER responses */ + k.qname = qname; + k.qname_len = qnamelen; + k.qtype = qtype; + k.qclass = qclass; + h = query_info_hash(&k); + e = slabhash_lookup(env->msg_cache, h, &k, 0); + if(e) { + struct msgreply_entry* key = (struct msgreply_entry*)e->key; + struct reply_info* data = (struct reply_info*)e->data; + struct dns_msg* msg = tomsg(env, &key->key, data, region, now, + scratch); + if(msg) { + lock_rw_unlock(&e->lock); + return msg; + } + /* could be msg==NULL; due to TTL or not all rrsets available */ + lock_rw_unlock(&e->lock); + } + + /* see if a DNAME exists. Checked for first, to enforce that DNAMEs + * are more important, the CNAME is resynthesized and thus + * consistent with the DNAME */ + if( (rrset=find_closest_of_type(env, qname, qnamelen, qclass, now, + LDNS_RR_TYPE_DNAME, 1))) { + /* synthesize a DNAME+CNAME message based on this */ + struct dns_msg* msg = synth_dname_msg(rrset, region, now, &k); + if(msg) { + lock_rw_unlock(&rrset->entry.lock); + return msg; + } + lock_rw_unlock(&rrset->entry.lock); + } + + /* see if we have CNAME for this domain, + * but not for DS records (which are part of the parent) */ + if( qtype != LDNS_RR_TYPE_DS && + (rrset=rrset_cache_lookup(env->rrset_cache, qname, qnamelen, + LDNS_RR_TYPE_CNAME, qclass, 0, now, 0))) { + struct dns_msg* msg = rrset_msg(rrset, region, now, &k); + if(msg) { + lock_rw_unlock(&rrset->entry.lock); + return msg; + } + lock_rw_unlock(&rrset->entry.lock); + } + + /* construct DS, DNSKEY, DLV messages from rrset cache. */ + if((qtype == LDNS_RR_TYPE_DS || qtype == LDNS_RR_TYPE_DNSKEY || + qtype == LDNS_RR_TYPE_DLV) && + (rrset=rrset_cache_lookup(env->rrset_cache, qname, qnamelen, + qtype, qclass, 0, now, 0))) { + /* if the rrset is from the additional section, and the + * signatures have fallen off, then do not synthesize a msg + * instead, allow a full query for signed results to happen. + * Forego all rrset data from additional section, because + * some signatures may not be present and cause validation + * failure. + */ + struct packed_rrset_data *d = (struct packed_rrset_data*) + rrset->entry.data; + if(d->trust != rrset_trust_add_noAA && + d->trust != rrset_trust_add_AA && + (qtype == LDNS_RR_TYPE_DS || + (d->trust != rrset_trust_auth_noAA + && d->trust != rrset_trust_auth_AA) )) { + struct dns_msg* msg = rrset_msg(rrset, region, now, &k); + if(msg) { + lock_rw_unlock(&rrset->entry.lock); + return msg; + } + } + lock_rw_unlock(&rrset->entry.lock); + } + + /* stop downwards cache search on NXDOMAIN. + * Empty nonterminals are NOERROR, so an NXDOMAIN for foo + * means bla.foo also does not exist. The DNSSEC proofs are + * the same. We search upwards for NXDOMAINs. */ + if(env->cfg->harden_below_nxdomain) + while(!dname_is_root(k.qname)) { + dname_remove_label(&k.qname, &k.qname_len); + h = query_info_hash(&k); + e = slabhash_lookup(env->msg_cache, h, &k, 0); + if(e) { + struct reply_info* data = (struct reply_info*)e->data; + struct dns_msg* msg; + if(FLAGS_GET_RCODE(data->flags) == LDNS_RCODE_NXDOMAIN + && data->security == sec_status_secure + && (msg=tomsg(env, &k, data, region, now, scratch))){ + lock_rw_unlock(&e->lock); + msg->qinfo.qname=qname; + msg->qinfo.qname_len=qnamelen; + /* check that DNSSEC really works out */ + msg->rep->security = sec_status_unchecked; + return msg; + } + lock_rw_unlock(&e->lock); + } + } + + return NULL; +} + +int +dns_cache_store(struct module_env* env, struct query_info* msgqinf, + struct reply_info* msgrep, int is_referral, time_t leeway, int pside, + struct regional* region) +{ + struct reply_info* rep = NULL; + /* alloc, malloc properly (not in region, like msg is) */ + rep = reply_info_copy(msgrep, env->alloc, NULL); + if(!rep) + return 0; + /* ttl must be relative ;i.e. 0..86400 not time(0)+86400. + * the env->now is added to message and RRsets in this routine. */ + /* the leeway is used to invalidate other rrsets earlier */ + + if(is_referral) { + /* store rrsets */ + struct rrset_ref ref; + size_t i; + for(i=0; i<rep->rrset_count; i++) { + packed_rrset_ttl_add((struct packed_rrset_data*) + rep->rrsets[i]->entry.data, *env->now); + ref.key = rep->rrsets[i]; + ref.id = rep->rrsets[i]->id; + /*ignore ret: it was in the cache, ref updated */ + /* no leeway for typeNS */ + (void)rrset_cache_update(env->rrset_cache, &ref, + env->alloc, *env->now + + ((ntohs(ref.key->rk.type)==LDNS_RR_TYPE_NS + && !pside) ? 0:leeway)); + } + free(rep); + return 1; + } else { + /* store msg, and rrsets */ + struct query_info qinf; + hashvalue_t h; + + qinf = *msgqinf; + qinf.qname = memdup(msgqinf->qname, msgqinf->qname_len); + if(!qinf.qname) { + reply_info_parsedelete(rep, env->alloc); + return 0; + } + /* fixup flags to be sensible for a reply based on the cache */ + /* this module means that RA is available. It is an answer QR. + * Not AA from cache. Not CD in cache (depends on client bit). */ + rep->flags |= (BIT_RA | BIT_QR); + rep->flags &= ~(BIT_AA | BIT_CD); + h = query_info_hash(&qinf); + dns_cache_store_msg(env, &qinf, h, rep, leeway, pside, msgrep, + region); + /* qname is used inside query_info_entrysetup, and set to + * NULL. If it has not been used, free it. free(0) is safe. */ + free(qinf.qname); + } + return 1; +} + +int +dns_cache_prefetch_adjust(struct module_env* env, struct query_info* qinfo, + time_t adjust) +{ + struct msgreply_entry* msg; + msg = msg_cache_lookup(env, qinfo->qname, qinfo->qname_len, + qinfo->qtype, qinfo->qclass, *env->now, 1); + if(msg) { + struct reply_info* rep = (struct reply_info*)msg->entry.data; + if(rep) { + rep->prefetch_ttl += adjust; + lock_rw_unlock(&msg->entry.lock); + return 1; + } + lock_rw_unlock(&msg->entry.lock); + } + return 0; +} diff --git a/external/unbound/services/cache/dns.h b/external/unbound/services/cache/dns.h new file mode 100644 index 000000000..05a3e6296 --- /dev/null +++ b/external/unbound/services/cache/dns.h @@ -0,0 +1,194 @@ +/* + * services/cache/dns.h - Cache services for DNS using msg and rrset caches. + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file contains the DNS cache. + */ + +#ifndef SERVICES_CACHE_DNS_H +#define SERVICES_CACHE_DNS_H +#include "util/storage/lruhash.h" +#include "util/data/msgreply.h" +struct module_env; +struct query_info; +struct reply_info; +struct regional; +struct delegpt; + +/** + * Region allocated message reply + */ +struct dns_msg { + /** query info */ + struct query_info qinfo; + /** reply info - ptr to packed repinfo structure */ + struct reply_info *rep; +}; + +/** + * Allocate a dns_msg with malloc/alloc structure and store in dns cache. + * + * @param env: environment, with alloc structure and dns cache. + * @param qinf: query info, the query for which answer is stored. + * this is allocated in a region, and will be copied to malloc area + * before insertion. + * @param rep: reply in dns_msg from dns_alloc_msg for example. + * this is allocated in a region, and will be copied to malloc area + * before insertion. + * @param is_referral: If true, then the given message to be stored is a + * referral. The cache implementation may use this as a hint. + * It will store only the RRsets, not the message. + * @param leeway: TTL value, if not 0, other rrsets are considered expired + * that many seconds before actual TTL expiry. + * @param pside: if true, information came from a server which was fetched + * from the parentside of the zonecut. This means that the type NS + * can be updated to full TTL even in prefetch situations. + * @param region: region to allocate better entries from cache into. + * (used when is_referral is false). + * @return 0 on alloc error (out of memory). + */ +int dns_cache_store(struct module_env* env, struct query_info* qinf, + struct reply_info* rep, int is_referral, time_t leeway, int pside, + struct regional* region); + +/** + * Store message in the cache. Stores in message cache and rrset cache. + * Both qinfo and rep should be malloced and are put in the cache. + * They should not be used after this call, as they are then in shared cache. + * Does not return errors, they are logged and only lead to less cache. + * + * @param env: module environment with the DNS cache. + * @param qinfo: query info + * @param hash: hash over qinfo. + * @param rep: reply info, together with qinfo makes up the message. + * Adjusts the reply info TTLs to absolute time. + * @param leeway: TTL value, if not 0, other rrsets are considered expired + * that many seconds before actual TTL expiry. + * @param pside: if true, information came from a server which was fetched + * from the parentside of the zonecut. This means that the type NS + * can be updated to full TTL even in prefetch situations. + * @param qrep: message that can be altered with better rrs from cache. + * @param region: to allocate into for qmsg. + */ +void dns_cache_store_msg(struct module_env* env, struct query_info* qinfo, + hashvalue_t hash, struct reply_info* rep, time_t leeway, int pside, + struct reply_info* qrep, struct regional* region); + +/** + * Find a delegation from the cache. + * @param env: module environment with the DNS cache. + * @param qname: query name. + * @param qnamelen: length of qname. + * @param qtype: query type. + * @param qclass: query class. + * @param region: where to allocate result delegation. + * @param msg: if not NULL, delegation message is returned here, synthesized + * from the cache. + * @param timenow: the time now, for checking if TTL on cache entries is OK. + * @return new delegation or NULL on error or if not found in cache. + */ +struct delegpt* dns_cache_find_delegation(struct module_env* env, + uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass, + struct regional* region, struct dns_msg** msg, time_t timenow); + +/** + * Find cached message + * @param env: module environment with the DNS cache. + * @param qname: query name. + * @param qnamelen: length of qname. + * @param qtype: query type. + * @param qclass: query class. + * @param region: where to allocate result. + * @param scratch: where to allocate temporary data. + * @return new response message (alloced in region, rrsets do not have IDs). + * or NULL on error or if not found in cache. + * TTLs are made relative to the current time. + */ +struct dns_msg* dns_cache_lookup(struct module_env* env, + uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass, + struct regional* region, struct regional* scratch); + +/** + * find and add A and AAAA records for missing nameservers in delegpt + * @param env: module environment with rrset cache + * @param qclass: which class to look in. + * @param region: where to store new dp info. + * @param dp: delegation point to fill missing entries. + * @return false on alloc failure. + */ +int cache_fill_missing(struct module_env* env, uint16_t qclass, + struct regional* region, struct delegpt* dp); + +/** + * Utility, create new, unpacked data structure for cache response. + * QR bit set, no AA. Query set as indicated. Space for number of rrsets. + * @param qname: query section name + * @param qnamelen: len of qname + * @param qtype: query section type + * @param qclass: query section class + * @param region: where to alloc. + * @param capacity: number of rrsets space to create in the array. + * @return new dns_msg struct or NULL on mem fail. + */ +struct dns_msg* dns_msg_create(uint8_t* qname, size_t qnamelen, uint16_t qtype, + uint16_t qclass, struct regional* region, size_t capacity); + +/** + * Add rrset to authority section in unpacked dns_msg message. Must have enough + * space left, does not grow the array. + * @param msg: msg to put it in. + * @param region: region to alloc in + * @param rrset: to add in authority section + * @param now: now. + * @return true if worked, false on fail + */ +int dns_msg_authadd(struct dns_msg* msg, struct regional* region, + struct ub_packed_rrset_key* rrset, time_t now); + +/** + * Adjust the prefetch_ttl for a cached message. This adds a value to the + * prefetch ttl - postponing the time when it will be prefetched for future + * incoming queries. + * @param env: module environment with caches and time. + * @param qinfo: query info for the query that needs adjustment. + * @param adjust: time in seconds to add to the prefetch_leeway. + * @return false if not in cache. true if added. + */ +int dns_cache_prefetch_adjust(struct module_env* env, struct query_info* qinfo, + time_t adjust); + +#endif /* SERVICES_CACHE_DNS_H */ diff --git a/external/unbound/services/cache/infra.c b/external/unbound/services/cache/infra.c new file mode 100644 index 000000000..07f2103d7 --- /dev/null +++ b/external/unbound/services/cache/infra.c @@ -0,0 +1,569 @@ +/* + * services/cache/infra.c - infrastructure cache, server rtt and capabilities + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file contains the infrastructure cache. + */ +#include "config.h" +#include "ldns/rrdef.h" +#include "services/cache/infra.h" +#include "util/storage/slabhash.h" +#include "util/storage/lookup3.h" +#include "util/data/dname.h" +#include "util/log.h" +#include "util/net_help.h" +#include "util/config_file.h" +#include "iterator/iterator.h" + +/** Timeout when only a single probe query per IP is allowed. */ +#define PROBE_MAXRTO 12000 /* in msec */ + +/** number of timeouts for a type when the domain can be blocked ; + * even if another type has completely rtt maxed it, the different type + * can do this number of packets (until those all timeout too) */ +#define TIMEOUT_COUNT_MAX 3 + +size_t +infra_sizefunc(void* k, void* ATTR_UNUSED(d)) +{ + struct infra_key* key = (struct infra_key*)k; + return sizeof(*key) + sizeof(struct infra_data) + key->namelen + + lock_get_mem(&key->entry.lock); +} + +int +infra_compfunc(void* key1, void* key2) +{ + struct infra_key* k1 = (struct infra_key*)key1; + struct infra_key* k2 = (struct infra_key*)key2; + int r = sockaddr_cmp(&k1->addr, k1->addrlen, &k2->addr, k2->addrlen); + if(r != 0) + return r; + if(k1->namelen != k2->namelen) { + if(k1->namelen < k2->namelen) + return -1; + return 1; + } + return query_dname_compare(k1->zonename, k2->zonename); +} + +void +infra_delkeyfunc(void* k, void* ATTR_UNUSED(arg)) +{ + struct infra_key* key = (struct infra_key*)k; + if(!key) + return; + lock_rw_destroy(&key->entry.lock); + free(key->zonename); + free(key); +} + +void +infra_deldatafunc(void* d, void* ATTR_UNUSED(arg)) +{ + struct infra_data* data = (struct infra_data*)d; + free(data); +} + +struct infra_cache* +infra_create(struct config_file* cfg) +{ + struct infra_cache* infra = (struct infra_cache*)calloc(1, + sizeof(struct infra_cache)); + size_t maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+ + sizeof(struct infra_data)+INFRA_BYTES_NAME); + infra->hosts = slabhash_create(cfg->infra_cache_slabs, + INFRA_HOST_STARTSIZE, maxmem, &infra_sizefunc, &infra_compfunc, + &infra_delkeyfunc, &infra_deldatafunc, NULL); + if(!infra->hosts) { + free(infra); + return NULL; + } + infra->host_ttl = cfg->host_ttl; + return infra; +} + +void +infra_delete(struct infra_cache* infra) +{ + if(!infra) + return; + slabhash_delete(infra->hosts); + free(infra); +} + +struct infra_cache* +infra_adjust(struct infra_cache* infra, struct config_file* cfg) +{ + size_t maxmem; + if(!infra) + return infra_create(cfg); + infra->host_ttl = cfg->host_ttl; + maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+ + sizeof(struct infra_data)+INFRA_BYTES_NAME); + if(maxmem != slabhash_get_size(infra->hosts) || + cfg->infra_cache_slabs != infra->hosts->size) { + infra_delete(infra); + infra = infra_create(cfg); + } + return infra; +} + +/** calculate the hash value for a host key */ +static hashvalue_t +hash_addr(struct sockaddr_storage* addr, socklen_t addrlen) +{ + hashvalue_t h = 0xab; + /* select the pieces to hash, some OS have changing data inside */ + if(addr_is_ip6(addr, addrlen)) { + struct sockaddr_in6* in6 = (struct sockaddr_in6*)addr; + h = hashlittle(&in6->sin6_family, sizeof(in6->sin6_family), h); + h = hashlittle(&in6->sin6_port, sizeof(in6->sin6_port), h); + h = hashlittle(&in6->sin6_addr, INET6_SIZE, h); + } else { + struct sockaddr_in* in = (struct sockaddr_in*)addr; + h = hashlittle(&in->sin_family, sizeof(in->sin_family), h); + h = hashlittle(&in->sin_port, sizeof(in->sin_port), h); + h = hashlittle(&in->sin_addr, INET_SIZE, h); + } + return h; +} + +/** calculate infra hash for a key */ +static hashvalue_t +hash_infra(struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name) +{ + return dname_query_hash(name, hash_addr(addr, addrlen)); +} + +/** lookup version that does not check host ttl (you check it) */ +struct lruhash_entry* +infra_lookup_nottl(struct infra_cache* infra, struct sockaddr_storage* addr, + socklen_t addrlen, uint8_t* name, size_t namelen, int wr) +{ + struct infra_key k; + k.addrlen = addrlen; + memcpy(&k.addr, addr, addrlen); + k.namelen = namelen; + k.zonename = name; + k.entry.hash = hash_infra(addr, addrlen, name); + k.entry.key = (void*)&k; + k.entry.data = NULL; + return slabhash_lookup(infra->hosts, k.entry.hash, &k, wr); +} + +/** init the data elements */ +static void +data_entry_init(struct infra_cache* infra, struct lruhash_entry* e, + time_t timenow) +{ + struct infra_data* data = (struct infra_data*)e->data; + data->ttl = timenow + infra->host_ttl; + rtt_init(&data->rtt); + data->edns_version = 0; + data->edns_lame_known = 0; + data->probedelay = 0; + data->isdnsseclame = 0; + data->rec_lame = 0; + data->lame_type_A = 0; + data->lame_other = 0; + data->timeout_A = 0; + data->timeout_AAAA = 0; + data->timeout_other = 0; +} + +/** + * Create and init a new entry for a host + * @param infra: infra structure with config parameters. + * @param addr: host address. + * @param addrlen: length of addr. + * @param name: name of zone + * @param namelen: length of name. + * @param tm: time now. + * @return: the new entry or NULL on malloc failure. + */ +static struct lruhash_entry* +new_entry(struct infra_cache* infra, struct sockaddr_storage* addr, + socklen_t addrlen, uint8_t* name, size_t namelen, time_t tm) +{ + struct infra_data* data; + struct infra_key* key = (struct infra_key*)malloc(sizeof(*key)); + if(!key) + return NULL; + data = (struct infra_data*)malloc(sizeof(struct infra_data)); + if(!data) { + free(key); + return NULL; + } + key->zonename = memdup(name, namelen); + if(!key->zonename) { + free(key); + free(data); + return NULL; + } + key->namelen = namelen; + lock_rw_init(&key->entry.lock); + key->entry.hash = hash_infra(addr, addrlen, name); + key->entry.key = (void*)key; + key->entry.data = (void*)data; + key->addrlen = addrlen; + memcpy(&key->addr, addr, addrlen); + data_entry_init(infra, &key->entry, tm); + return &key->entry; +} + +int +infra_host(struct infra_cache* infra, struct sockaddr_storage* addr, + socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow, + int* edns_vs, uint8_t* edns_lame_known, int* to) +{ + struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, + nm, nmlen, 0); + struct infra_data* data; + int wr = 0; + if(e && ((struct infra_data*)e->data)->ttl < timenow) { + /* it expired, try to reuse existing entry */ + int old = ((struct infra_data*)e->data)->rtt.rto; + uint8_t tA = ((struct infra_data*)e->data)->timeout_A; + uint8_t tAAAA = ((struct infra_data*)e->data)->timeout_AAAA; + uint8_t tother = ((struct infra_data*)e->data)->timeout_other; + lock_rw_unlock(&e->lock); + e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1); + if(e) { + /* if its still there we have a writelock, init */ + /* re-initialise */ + /* do not touch lameness, it may be valid still */ + data_entry_init(infra, e, timenow); + wr = 1; + /* TOP_TIMEOUT remains on reuse */ + if(old >= USEFUL_SERVER_TOP_TIMEOUT) { + ((struct infra_data*)e->data)->rtt.rto + = USEFUL_SERVER_TOP_TIMEOUT; + ((struct infra_data*)e->data)->timeout_A = tA; + ((struct infra_data*)e->data)->timeout_AAAA = tAAAA; + ((struct infra_data*)e->data)->timeout_other = tother; + } + } + } + if(!e) { + /* insert new entry */ + if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) + return 0; + data = (struct infra_data*)e->data; + *edns_vs = data->edns_version; + *edns_lame_known = data->edns_lame_known; + *to = rtt_timeout(&data->rtt); + slabhash_insert(infra->hosts, e->hash, e, data, NULL); + return 1; + } + /* use existing entry */ + data = (struct infra_data*)e->data; + *edns_vs = data->edns_version; + *edns_lame_known = data->edns_lame_known; + *to = rtt_timeout(&data->rtt); + if(*to >= PROBE_MAXRTO && rtt_notimeout(&data->rtt)*4 <= *to) { + /* delay other queries, this is the probe query */ + if(!wr) { + lock_rw_unlock(&e->lock); + e = infra_lookup_nottl(infra, addr,addrlen,nm,nmlen, 1); + if(!e) { /* flushed from cache real fast, no use to + allocate just for the probedelay */ + return 1; + } + data = (struct infra_data*)e->data; + } + /* add 999 to round up the timeout value from msec to sec, + * then add a whole second so it is certain that this probe + * has timed out before the next is allowed */ + data->probedelay = timenow + ((*to)+1999)/1000; + } + lock_rw_unlock(&e->lock); + return 1; +} + +int +infra_set_lame(struct infra_cache* infra, struct sockaddr_storage* addr, + socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow, + int dnsseclame, int reclame, uint16_t qtype) +{ + struct infra_data* data; + struct lruhash_entry* e; + int needtoinsert = 0; + e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1); + if(!e) { + /* insert it */ + if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) { + log_err("set_lame: malloc failure"); + return 0; + } + needtoinsert = 1; + } else if( ((struct infra_data*)e->data)->ttl < timenow) { + /* expired, reuse existing entry */ + data_entry_init(infra, e, timenow); + } + /* got an entry, now set the zone lame */ + data = (struct infra_data*)e->data; + /* merge data (if any) */ + if(dnsseclame) + data->isdnsseclame = 1; + if(reclame) + data->rec_lame = 1; + if(!dnsseclame && !reclame && qtype == LDNS_RR_TYPE_A) + data->lame_type_A = 1; + if(!dnsseclame && !reclame && qtype != LDNS_RR_TYPE_A) + data->lame_other = 1; + /* done */ + if(needtoinsert) + slabhash_insert(infra->hosts, e->hash, e, e->data, NULL); + else { lock_rw_unlock(&e->lock); } + return 1; +} + +void +infra_update_tcp_works(struct infra_cache* infra, + struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm, + size_t nmlen) +{ + struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, + nm, nmlen, 1); + struct infra_data* data; + if(!e) + return; /* doesn't exist */ + data = (struct infra_data*)e->data; + if(data->rtt.rto >= RTT_MAX_TIMEOUT) + /* do not disqualify this server altogether, it is better + * than nothing */ + data->rtt.rto = RTT_MAX_TIMEOUT-1000; + lock_rw_unlock(&e->lock); +} + +int +infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr, + socklen_t addrlen, uint8_t* nm, size_t nmlen, int qtype, + int roundtrip, int orig_rtt, time_t timenow) +{ + struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, + nm, nmlen, 1); + struct infra_data* data; + int needtoinsert = 0; + int rto = 1; + if(!e) { + if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) + return 0; + needtoinsert = 1; + } else if(((struct infra_data*)e->data)->ttl < timenow) { + data_entry_init(infra, e, timenow); + } + /* have an entry, update the rtt */ + data = (struct infra_data*)e->data; + if(roundtrip == -1) { + rtt_lost(&data->rtt, orig_rtt); + if(qtype == LDNS_RR_TYPE_A) { + if(data->timeout_A < TIMEOUT_COUNT_MAX) + data->timeout_A++; + } else if(qtype == LDNS_RR_TYPE_AAAA) { + if(data->timeout_AAAA < TIMEOUT_COUNT_MAX) + data->timeout_AAAA++; + } else { + if(data->timeout_other < TIMEOUT_COUNT_MAX) + data->timeout_other++; + } + } else { + /* if we got a reply, but the old timeout was above server + * selection height, delete the timeout so the server is + * fully available again */ + if(rtt_unclamped(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT) + rtt_init(&data->rtt); + rtt_update(&data->rtt, roundtrip); + data->probedelay = 0; + if(qtype == LDNS_RR_TYPE_A) + data->timeout_A = 0; + else if(qtype == LDNS_RR_TYPE_AAAA) + data->timeout_AAAA = 0; + else data->timeout_other = 0; + } + if(data->rtt.rto > 0) + rto = data->rtt.rto; + + if(needtoinsert) + slabhash_insert(infra->hosts, e->hash, e, e->data, NULL); + else { lock_rw_unlock(&e->lock); } + return rto; +} + +long long infra_get_host_rto(struct infra_cache* infra, + struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm, + size_t nmlen, struct rtt_info* rtt, int* delay, time_t timenow, + int* tA, int* tAAAA, int* tother) +{ + struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, + nm, nmlen, 0); + struct infra_data* data; + long long ttl = -2; + if(!e) return -1; + data = (struct infra_data*)e->data; + if(data->ttl >= timenow) { + ttl = (long long)(data->ttl - timenow); + memmove(rtt, &data->rtt, sizeof(*rtt)); + if(timenow < data->probedelay) + *delay = (int)(data->probedelay - timenow); + else *delay = 0; + } + *tA = (int)data->timeout_A; + *tAAAA = (int)data->timeout_AAAA; + *tother = (int)data->timeout_other; + lock_rw_unlock(&e->lock); + return ttl; +} + +int +infra_edns_update(struct infra_cache* infra, struct sockaddr_storage* addr, + socklen_t addrlen, uint8_t* nm, size_t nmlen, int edns_version, + time_t timenow) +{ + struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, + nm, nmlen, 1); + struct infra_data* data; + int needtoinsert = 0; + if(!e) { + if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) + return 0; + needtoinsert = 1; + } else if(((struct infra_data*)e->data)->ttl < timenow) { + data_entry_init(infra, e, timenow); + } + /* have an entry, update the rtt, and the ttl */ + data = (struct infra_data*)e->data; + /* do not update if noEDNS and stored is yesEDNS */ + if(!(edns_version == -1 && (data->edns_version != -1 && + data->edns_lame_known))) { + data->edns_version = edns_version; + data->edns_lame_known = 1; + } + + if(needtoinsert) + slabhash_insert(infra->hosts, e->hash, e, e->data, NULL); + else { lock_rw_unlock(&e->lock); } + return 1; +} + +int +infra_get_lame_rtt(struct infra_cache* infra, + struct sockaddr_storage* addr, socklen_t addrlen, + uint8_t* name, size_t namelen, uint16_t qtype, + int* lame, int* dnsseclame, int* reclame, int* rtt, time_t timenow) +{ + struct infra_data* host; + struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, + name, namelen, 0); + if(!e) + return 0; + host = (struct infra_data*)e->data; + *rtt = rtt_unclamped(&host->rtt); + if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay + && rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) { + /* single probe for this domain, and we are not probing */ + /* unless the query type allows a probe to happen */ + if(qtype == LDNS_RR_TYPE_A) { + if(host->timeout_A >= TIMEOUT_COUNT_MAX) + *rtt = USEFUL_SERVER_TOP_TIMEOUT; + else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; + } else if(qtype == LDNS_RR_TYPE_AAAA) { + if(host->timeout_AAAA >= TIMEOUT_COUNT_MAX) + *rtt = USEFUL_SERVER_TOP_TIMEOUT; + else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; + } else { + if(host->timeout_other >= TIMEOUT_COUNT_MAX) + *rtt = USEFUL_SERVER_TOP_TIMEOUT; + else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; + } + } + if(timenow > host->ttl) { + /* expired entry */ + /* see if this can be a re-probe of an unresponsive server */ + /* minus 1000 because that is outside of the RTTBAND, so + * blacklisted servers stay blacklisted if this is chosen */ + if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT) { + lock_rw_unlock(&e->lock); + *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; + *lame = 0; + *dnsseclame = 0; + *reclame = 0; + return 1; + } + lock_rw_unlock(&e->lock); + return 0; + } + /* check lameness first */ + if(host->lame_type_A && qtype == LDNS_RR_TYPE_A) { + lock_rw_unlock(&e->lock); + *lame = 1; + *dnsseclame = 0; + *reclame = 0; + return 1; + } else if(host->lame_other && qtype != LDNS_RR_TYPE_A) { + lock_rw_unlock(&e->lock); + *lame = 1; + *dnsseclame = 0; + *reclame = 0; + return 1; + } else if(host->isdnsseclame) { + lock_rw_unlock(&e->lock); + *lame = 0; + *dnsseclame = 1; + *reclame = 0; + return 1; + } else if(host->rec_lame) { + lock_rw_unlock(&e->lock); + *lame = 0; + *dnsseclame = 0; + *reclame = 1; + return 1; + } + /* no lameness for this type of query */ + lock_rw_unlock(&e->lock); + *lame = 0; + *dnsseclame = 0; + *reclame = 0; + return 1; +} + +size_t +infra_get_mem(struct infra_cache* infra) +{ + return sizeof(*infra) + slabhash_get_mem(infra->hosts); +} diff --git a/external/unbound/services/cache/infra.h b/external/unbound/services/cache/infra.h new file mode 100644 index 000000000..fc54f7f0d --- /dev/null +++ b/external/unbound/services/cache/infra.h @@ -0,0 +1,309 @@ +/* + * services/cache/infra.h - infrastructure cache, server rtt and capabilities + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file contains the infrastructure cache. + */ + +#ifndef SERVICES_CACHE_INFRA_H +#define SERVICES_CACHE_INFRA_H +#include "util/storage/lruhash.h" +#include "util/rtt.h" +struct slabhash; +struct config_file; + +/** + * Host information kept for every server, per zone. + */ +struct infra_key { + /** the host address. */ + struct sockaddr_storage addr; + /** length of addr. */ + socklen_t addrlen; + /** zone name in wireformat */ + uint8_t* zonename; + /** length of zonename */ + size_t namelen; + /** hash table entry, data of type infra_data. */ + struct lruhash_entry entry; +}; + +/** + * Host information encompasses host capabilities and retransmission timeouts. + * And lameness information (notAuthoritative, noEDNS, Recursive) + */ +struct infra_data { + /** TTL value for this entry. absolute time. */ + time_t ttl; + + /** time in seconds (absolute) when probing re-commences, 0 disabled */ + time_t probedelay; + /** round trip times for timeout calculation */ + struct rtt_info rtt; + + /** edns version that the host supports, -1 means no EDNS */ + int edns_version; + /** if the EDNS lameness is already known or not. + * EDNS lame is when EDNS queries or replies are dropped, + * and cause a timeout */ + uint8_t edns_lame_known; + + /** is the host lame (does not serve the zone authoritatively), + * or is the host dnssec lame (does not serve DNSSEC data) */ + uint8_t isdnsseclame; + /** is the host recursion lame (not AA, but RA) */ + uint8_t rec_lame; + /** the host is lame (not authoritative) for A records */ + uint8_t lame_type_A; + /** the host is lame (not authoritative) for other query types */ + uint8_t lame_other; + + /** timeouts counter for type A */ + uint8_t timeout_A; + /** timeouts counter for type AAAA */ + uint8_t timeout_AAAA; + /** timeouts counter for others */ + uint8_t timeout_other; +}; + +/** + * Infra cache + */ +struct infra_cache { + /** The hash table with hosts */ + struct slabhash* hosts; + /** TTL value for host information, in seconds */ + int host_ttl; +}; + +/** infra host cache default hash lookup size */ +#define INFRA_HOST_STARTSIZE 32 +/** bytes per zonename reserved in the hostcache, dnamelen(zonename.com.) */ +#define INFRA_BYTES_NAME 14 + +/** + * Create infra cache. + * @param cfg: config parameters or NULL for defaults. + * @return: new infra cache, or NULL. + */ +struct infra_cache* infra_create(struct config_file* cfg); + +/** + * Delete infra cache. + * @param infra: infrastructure cache to delete. + */ +void infra_delete(struct infra_cache* infra); + +/** + * Adjust infra cache to use updated configuration settings. + * This may clean the cache. Operates a bit like realloc. + * There may be no threading or use by other threads. + * @param infra: existing cache. If NULL a new infra cache is returned. + * @param cfg: config options. + * @return the new infra cache pointer or NULL on error. + */ +struct infra_cache* infra_adjust(struct infra_cache* infra, + struct config_file* cfg); + +/** + * Plain find infra data function (used by the the other functions) + * @param infra: infrastructure cache. + * @param addr: host address. + * @param addrlen: length of addr. + * @param name: domain name of zone. + * @param namelen: length of domain name. + * @param wr: if true, writelock, else readlock. + * @return the entry, could be expired (this is not checked) or NULL. + */ +struct lruhash_entry* infra_lookup_nottl(struct infra_cache* infra, + struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name, + size_t namelen, int wr); + +/** + * Find host information to send a packet. Creates new entry if not found. + * Lameness is empty. EDNS is 0 (try with first), and rtt is returned for + * the first message to it. + * Use this to send a packet only, because it also locks out others when + * probing is restricted. + * @param infra: infrastructure cache. + * @param addr: host address. + * @param addrlen: length of addr. + * @param name: domain name of zone. + * @param namelen: length of domain name. + * @param timenow: what time it is now. + * @param edns_vs: edns version it supports, is returned. + * @param edns_lame_known: if EDNS lame (EDNS is dropped in transit) has + * already been probed, is returned. + * @param to: timeout to use, is returned. + * @return: 0 on error. + */ +int infra_host(struct infra_cache* infra, struct sockaddr_storage* addr, + socklen_t addrlen, uint8_t* name, size_t namelen, + time_t timenow, int* edns_vs, uint8_t* edns_lame_known, int* to); + +/** + * Set a host to be lame for the given zone. + * @param infra: infrastructure cache. + * @param addr: host address. + * @param addrlen: length of addr. + * @param name: domain name of zone apex. + * @param namelen: length of domain name. + * @param timenow: what time it is now. + * @param dnsseclame: if true the host is set dnssec lame. + * if false, the host is marked lame (not serving the zone). + * @param reclame: if true host is a recursor not AA server. + * if false, dnsseclame or marked lame. + * @param qtype: the query type for which it is lame. + * @return: 0 on error. + */ +int infra_set_lame(struct infra_cache* infra, + struct sockaddr_storage* addr, socklen_t addrlen, + uint8_t* name, size_t namelen, time_t timenow, int dnsseclame, + int reclame, uint16_t qtype); + +/** + * Update rtt information for the host. + * @param infra: infrastructure cache. + * @param addr: host address. + * @param addrlen: length of addr. + * @param name: zone name + * @param namelen: zone name length + * @param qtype: query type. + * @param roundtrip: estimate of roundtrip time in milliseconds or -1 for + * timeout. + * @param orig_rtt: original rtt for the query that timed out (roundtrip==-1). + * ignored if roundtrip != -1. + * @param timenow: what time it is now. + * @return: 0 on error. new rto otherwise. + */ +int infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr, + socklen_t addrlen, uint8_t* name, size_t namelen, int qtype, + int roundtrip, int orig_rtt, time_t timenow); + +/** + * Update information for the host, store that a TCP transaction works. + * @param infra: infrastructure cache. + * @param addr: host address. + * @param addrlen: length of addr. + * @param name: name of zone + * @param namelen: length of name + */ +void infra_update_tcp_works(struct infra_cache* infra, + struct sockaddr_storage* addr, socklen_t addrlen, + uint8_t* name, size_t namelen); + +/** + * Update edns information for the host. + * @param infra: infrastructure cache. + * @param addr: host address. + * @param addrlen: length of addr. + * @param name: name of zone + * @param namelen: length of name + * @param edns_version: the version that it publishes. + * If it is known to support EDNS then no-EDNS is not stored over it. + * @param timenow: what time it is now. + * @return: 0 on error. + */ +int infra_edns_update(struct infra_cache* infra, + struct sockaddr_storage* addr, socklen_t addrlen, + uint8_t* name, size_t namelen, int edns_version, time_t timenow); + +/** + * Get Lameness information and average RTT if host is in the cache. + * This information is to be used for server selection. + * @param infra: infrastructure cache. + * @param addr: host address. + * @param addrlen: length of addr. + * @param name: zone name. + * @param namelen: zone name length. + * @param qtype: the query to be made. + * @param lame: if function returns true, this returns lameness of the zone. + * @param dnsseclame: if function returns true, this returns if the zone + * is dnssec-lame. + * @param reclame: if function returns true, this is if it is recursion lame. + * @param rtt: if function returns true, this returns avg rtt of the server. + * The rtt value is unclamped and reflects recent timeouts. + * @param timenow: what time it is now. + * @return if found in cache, or false if not (or TTL bad). + */ +int infra_get_lame_rtt(struct infra_cache* infra, + struct sockaddr_storage* addr, socklen_t addrlen, + uint8_t* name, size_t namelen, uint16_t qtype, + int* lame, int* dnsseclame, int* reclame, int* rtt, time_t timenow); + +/** + * Get additional (debug) info on timing. + * @param infra: infra cache. + * @param addr: host address. + * @param addrlen: length of addr. + * @param name: zone name + * @param namelen: zone name length + * @param rtt: the rtt_info is copied into here (caller alloced return struct). + * @param delay: probe delay (if any). + * @param timenow: what time it is now. + * @param tA: timeout counter on type A. + * @param tAAAA: timeout counter on type AAAA. + * @param tother: timeout counter on type other. + * @return TTL the infra host element is valid for. If -1: not found in cache. + * TTL -2: found but expired. + */ +long long infra_get_host_rto(struct infra_cache* infra, + struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name, + size_t namelen, struct rtt_info* rtt, int* delay, time_t timenow, + int* tA, int* tAAAA, int* tother); + +/** + * Get memory used by the infra cache. + * @param infra: infrastructure cache. + * @return memory in use in bytes. + */ +size_t infra_get_mem(struct infra_cache* infra); + +/** calculate size for the hashtable, does not count size of lameness, + * so the hashtable is a fixed number of items */ +size_t infra_sizefunc(void* k, void* d); + +/** compare two addresses, returns -1, 0, or +1 */ +int infra_compfunc(void* key1, void* key2); + +/** delete key, and destroy the lock */ +void infra_delkeyfunc(void* k, void* arg); + +/** delete data and destroy the lameness hashtable */ +void infra_deldatafunc(void* d, void* arg); + +#endif /* SERVICES_CACHE_INFRA_H */ diff --git a/external/unbound/services/cache/rrset.c b/external/unbound/services/cache/rrset.c new file mode 100644 index 000000000..5f52dbce1 --- /dev/null +++ b/external/unbound/services/cache/rrset.c @@ -0,0 +1,417 @@ +/* + * services/cache/rrset.c - Resource record set cache. + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file contains the rrset cache. + */ +#include "config.h" +#include "services/cache/rrset.h" +#include "ldns/rrdef.h" +#include "util/storage/slabhash.h" +#include "util/config_file.h" +#include "util/data/packed_rrset.h" +#include "util/data/msgreply.h" +#include "util/regional.h" +#include "util/alloc.h" + +void +rrset_markdel(void* key) +{ + struct ub_packed_rrset_key* r = (struct ub_packed_rrset_key*)key; + r->id = 0; +} + +struct rrset_cache* rrset_cache_create(struct config_file* cfg, + struct alloc_cache* alloc) +{ + size_t slabs = (cfg?cfg->rrset_cache_slabs:HASH_DEFAULT_SLABS); + size_t startarray = HASH_DEFAULT_STARTARRAY; + size_t maxmem = (cfg?cfg->rrset_cache_size:HASH_DEFAULT_MAXMEM); + + struct rrset_cache *r = (struct rrset_cache*)slabhash_create(slabs, + startarray, maxmem, ub_rrset_sizefunc, ub_rrset_compare, + ub_rrset_key_delete, rrset_data_delete, alloc); + slabhash_setmarkdel(&r->table, &rrset_markdel); + return r; +} + +void rrset_cache_delete(struct rrset_cache* r) +{ + if(!r) + return; + slabhash_delete(&r->table); + /* slabhash delete also does free(r), since table is first in struct*/ +} + +struct rrset_cache* rrset_cache_adjust(struct rrset_cache *r, + struct config_file* cfg, struct alloc_cache* alloc) +{ + if(!r || !cfg || cfg->rrset_cache_slabs != r->table.size || + cfg->rrset_cache_size != slabhash_get_size(&r->table)) + { + rrset_cache_delete(r); + r = rrset_cache_create(cfg, alloc); + } + return r; +} + +void +rrset_cache_touch(struct rrset_cache* r, struct ub_packed_rrset_key* key, + hashvalue_t hash, rrset_id_t id) +{ + struct lruhash* table = slabhash_gettable(&r->table, hash); + /* + * This leads to locking problems, deadlocks, if the caller is + * holding any other rrset lock. + * Because a lookup through the hashtable does: + * tablelock -> entrylock (for that entry caller holds) + * And this would do + * entrylock(already held) -> tablelock + * And if two threads do this, it results in deadlock. + * So, the caller must not hold entrylock. + */ + lock_quick_lock(&table->lock); + /* we have locked the hash table, the item can still be deleted. + * because it could already have been reclaimed, but not yet set id=0. + * This is because some lruhash routines have lazy deletion. + * so, we must acquire a lock on the item to verify the id != 0. + * also, with hash not changed, we are using the right slab. + */ + lock_rw_rdlock(&key->entry.lock); + if(key->id == id && key->entry.hash == hash) { + lru_touch(table, &key->entry); + } + lock_rw_unlock(&key->entry.lock); + lock_quick_unlock(&table->lock); +} + +/** see if rrset needs to be updated in the cache */ +static int +need_to_update_rrset(void* nd, void* cd, time_t timenow, int equal, int ns) +{ + struct packed_rrset_data* newd = (struct packed_rrset_data*)nd; + struct packed_rrset_data* cached = (struct packed_rrset_data*)cd; + /* o store if rrset has been validated + * everything better than bogus data + * secure is preferred */ + if( newd->security == sec_status_secure && + cached->security != sec_status_secure) + return 1; + if( cached->security == sec_status_bogus && + newd->security != sec_status_bogus && !equal) + return 1; + /* o if current RRset is more trustworthy - insert it */ + if( newd->trust > cached->trust ) { + /* if the cached rrset is bogus, and this one equal, + * do not update the TTL - let it expire. */ + if(equal && cached->ttl >= timenow && + cached->security == sec_status_bogus) + return 0; + return 1; + } + /* o item in cache has expired */ + if( cached->ttl < timenow ) + return 1; + /* o same trust, but different in data - insert it */ + if( newd->trust == cached->trust && !equal ) { + /* if this is type NS, do not 'stick' to owner that changes + * the NS RRset, but use the old TTL for the new data, and + * update to fetch the latest data. ttl is not expired, because + * that check was before this one. */ + if(ns) { + size_t i; + newd->ttl = cached->ttl; + for(i=0; i<(newd->count+newd->rrsig_count); i++) + if(newd->rr_ttl[i] > newd->ttl) + newd->rr_ttl[i] = newd->ttl; + } + return 1; + } + return 0; +} + +/** Update RRSet special key ID */ +static void +rrset_update_id(struct rrset_ref* ref, struct alloc_cache* alloc) +{ + /* this may clear the cache and invalidate lock below */ + uint64_t newid = alloc_get_id(alloc); + /* obtain writelock */ + lock_rw_wrlock(&ref->key->entry.lock); + /* check if it was deleted in the meantime, if so, skip update */ + if(ref->key->id == ref->id) { + ref->key->id = newid; + ref->id = newid; + } + lock_rw_unlock(&ref->key->entry.lock); +} + +int +rrset_cache_update(struct rrset_cache* r, struct rrset_ref* ref, + struct alloc_cache* alloc, time_t timenow) +{ + struct lruhash_entry* e; + struct ub_packed_rrset_key* k = ref->key; + hashvalue_t h = k->entry.hash; + uint16_t rrset_type = ntohs(k->rk.type); + int equal = 0; + log_assert(ref->id != 0 && k->id != 0); + /* looks up item with a readlock - no editing! */ + if((e=slabhash_lookup(&r->table, h, k, 0)) != 0) { + /* return id and key as they will be used in the cache + * since the lruhash_insert, if item already exists, deallocs + * the passed key in favor of the already stored key. + * because of the small gap (see below) this key ptr and id + * may prove later to be already deleted, which is no problem + * as it only makes a cache miss. + */ + ref->key = (struct ub_packed_rrset_key*)e->key; + ref->id = ref->key->id; + equal = rrsetdata_equal((struct packed_rrset_data*)k->entry. + data, (struct packed_rrset_data*)e->data); + if(!need_to_update_rrset(k->entry.data, e->data, timenow, + equal, (rrset_type==LDNS_RR_TYPE_NS))) { + /* cache is superior, return that value */ + lock_rw_unlock(&e->lock); + ub_packed_rrset_parsedelete(k, alloc); + if(equal) return 2; + return 1; + } + lock_rw_unlock(&e->lock); + /* Go on and insert the passed item. + * small gap here, where entry is not locked. + * possibly entry is updated with something else. + * we then overwrite that with our data. + * this is just too bad, its cache anyway. */ + /* use insert to update entry to manage lruhash + * cache size values nicely. */ + } + log_assert(ref->key->id != 0); + slabhash_insert(&r->table, h, &k->entry, k->entry.data, alloc); + if(e) { + /* For NSEC, NSEC3, DNAME, when rdata is updated, update + * the ID number so that proofs in message cache are + * invalidated */ + if((rrset_type == LDNS_RR_TYPE_NSEC + || rrset_type == LDNS_RR_TYPE_NSEC3 + || rrset_type == LDNS_RR_TYPE_DNAME) && !equal) { + rrset_update_id(ref, alloc); + } + return 1; + } + return 0; +} + +struct ub_packed_rrset_key* +rrset_cache_lookup(struct rrset_cache* r, uint8_t* qname, size_t qnamelen, + uint16_t qtype, uint16_t qclass, uint32_t flags, time_t timenow, + int wr) +{ + struct lruhash_entry* e; + struct ub_packed_rrset_key key; + + key.entry.key = &key; + key.entry.data = NULL; + key.rk.dname = qname; + key.rk.dname_len = qnamelen; + key.rk.type = htons(qtype); + key.rk.rrset_class = htons(qclass); + key.rk.flags = flags; + + key.entry.hash = rrset_key_hash(&key.rk); + + if((e = slabhash_lookup(&r->table, key.entry.hash, &key, wr))) { + /* check TTL */ + struct packed_rrset_data* data = + (struct packed_rrset_data*)e->data; + if(timenow > data->ttl) { + lock_rw_unlock(&e->lock); + return NULL; + } + /* we're done */ + return (struct ub_packed_rrset_key*)e->key; + } + return NULL; +} + +int +rrset_array_lock(struct rrset_ref* ref, size_t count, time_t timenow) +{ + size_t i; + for(i=0; i<count; i++) { + if(i>0 && ref[i].key == ref[i-1].key) + continue; /* only lock items once */ + lock_rw_rdlock(&ref[i].key->entry.lock); + if(ref[i].id != ref[i].key->id || timenow > + ((struct packed_rrset_data*)(ref[i].key->entry.data)) + ->ttl) { + /* failure! rollback our readlocks */ + rrset_array_unlock(ref, i+1); + return 0; + } + } + return 1; +} + +void +rrset_array_unlock(struct rrset_ref* ref, size_t count) +{ + size_t i; + for(i=0; i<count; i++) { + if(i>0 && ref[i].key == ref[i-1].key) + continue; /* only unlock items once */ + lock_rw_unlock(&ref[i].key->entry.lock); + } +} + +void +rrset_array_unlock_touch(struct rrset_cache* r, struct regional* scratch, + struct rrset_ref* ref, size_t count) +{ + hashvalue_t* h; + size_t i; + if(!(h = (hashvalue_t*)regional_alloc(scratch, + sizeof(hashvalue_t)*count))) + log_warn("rrset LRU: memory allocation failed"); + else /* store hash values */ + for(i=0; i<count; i++) + h[i] = ref[i].key->entry.hash; + /* unlock */ + for(i=0; i<count; i++) { + if(i>0 && ref[i].key == ref[i-1].key) + continue; /* only unlock items once */ + lock_rw_unlock(&ref[i].key->entry.lock); + } + if(h) { + /* LRU touch, with no rrset locks held */ + for(i=0; i<count; i++) { + if(i>0 && ref[i].key == ref[i-1].key) + continue; /* only touch items once */ + rrset_cache_touch(r, ref[i].key, h[i], ref[i].id); + } + } +} + +void +rrset_update_sec_status(struct rrset_cache* r, + struct ub_packed_rrset_key* rrset, time_t now) +{ + struct packed_rrset_data* updata = + (struct packed_rrset_data*)rrset->entry.data; + struct lruhash_entry* e; + struct packed_rrset_data* cachedata; + + /* hash it again to make sure it has a hash */ + rrset->entry.hash = rrset_key_hash(&rrset->rk); + + e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 1); + if(!e) + return; /* not in the cache anymore */ + cachedata = (struct packed_rrset_data*)e->data; + if(!rrsetdata_equal(updata, cachedata)) { + lock_rw_unlock(&e->lock); + return; /* rrset has changed in the meantime */ + } + /* update the cached rrset */ + if(updata->security > cachedata->security) { + size_t i; + if(updata->trust > cachedata->trust) + cachedata->trust = updata->trust; + cachedata->security = updata->security; + /* for NS records only shorter TTLs, other types: update it */ + if(ntohs(rrset->rk.type) != LDNS_RR_TYPE_NS || + updata->ttl+now < cachedata->ttl || + cachedata->ttl < now || + updata->security == sec_status_bogus) { + cachedata->ttl = updata->ttl + now; + for(i=0; i<cachedata->count+cachedata->rrsig_count; i++) + cachedata->rr_ttl[i] = updata->rr_ttl[i]+now; + } + } + lock_rw_unlock(&e->lock); +} + +void +rrset_check_sec_status(struct rrset_cache* r, + struct ub_packed_rrset_key* rrset, time_t now) +{ + struct packed_rrset_data* updata = + (struct packed_rrset_data*)rrset->entry.data; + struct lruhash_entry* e; + struct packed_rrset_data* cachedata; + + /* hash it again to make sure it has a hash */ + rrset->entry.hash = rrset_key_hash(&rrset->rk); + + e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 0); + if(!e) + return; /* not in the cache anymore */ + cachedata = (struct packed_rrset_data*)e->data; + if(now > cachedata->ttl || !rrsetdata_equal(updata, cachedata)) { + lock_rw_unlock(&e->lock); + return; /* expired, or rrset has changed in the meantime */ + } + if(cachedata->security > updata->security) { + updata->security = cachedata->security; + if(cachedata->security == sec_status_bogus) { + size_t i; + updata->ttl = cachedata->ttl - now; + for(i=0; i<cachedata->count+cachedata->rrsig_count; i++) + if(cachedata->rr_ttl[i] < now) + updata->rr_ttl[i] = 0; + else updata->rr_ttl[i] = + cachedata->rr_ttl[i]-now; + } + if(cachedata->trust > updata->trust) + updata->trust = cachedata->trust; + } + lock_rw_unlock(&e->lock); +} + +void rrset_cache_remove(struct rrset_cache* r, uint8_t* nm, size_t nmlen, + uint16_t type, uint16_t dclass, uint32_t flags) +{ + struct ub_packed_rrset_key key; + key.entry.key = &key; + key.rk.dname = nm; + key.rk.dname_len = nmlen; + key.rk.rrset_class = htons(dclass); + key.rk.type = htons(type); + key.rk.flags = flags; + key.entry.hash = rrset_key_hash(&key.rk); + slabhash_remove(&r->table, key.entry.hash, &key); +} diff --git a/external/unbound/services/cache/rrset.h b/external/unbound/services/cache/rrset.h new file mode 100644 index 000000000..98e44a4e5 --- /dev/null +++ b/external/unbound/services/cache/rrset.h @@ -0,0 +1,231 @@ +/* + * services/cache/rrset.h - Resource record set cache. + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file contains the rrset cache. + */ + +#ifndef SERVICES_CACHE_RRSET_H +#define SERVICES_CACHE_RRSET_H +#include "util/storage/lruhash.h" +#include "util/storage/slabhash.h" +#include "util/data/packed_rrset.h" +struct config_file; +struct alloc_cache; +struct rrset_ref; +struct regional; + +/** + * The rrset cache + * Thin wrapper around hashtable, like a typedef. + */ +struct rrset_cache { + /** uses partitioned hash table */ + struct slabhash table; +}; + +/** + * Create rrset cache + * @param cfg: config settings or NULL for defaults. + * @param alloc: initial default rrset key allocation. + * @return: NULL on error. + */ +struct rrset_cache* rrset_cache_create(struct config_file* cfg, + struct alloc_cache* alloc); + +/** + * Delete rrset cache + * @param r: rrset cache to delete. + */ +void rrset_cache_delete(struct rrset_cache* r); + +/** + * Adjust settings of the cache to settings from the config file. + * May purge the cache. May recreate the cache. + * There may be no threading or use by other threads. + * @param r: rrset cache to adjust (like realloc). + * @param cfg: config settings or NULL for defaults. + * @param alloc: initial default rrset key allocation. + * @return 0 on error, or new rrset cache pointer on success. + */ +struct rrset_cache* rrset_cache_adjust(struct rrset_cache* r, + struct config_file* cfg, struct alloc_cache* alloc); + +/** + * Touch rrset, with given pointer and id. + * Caller may not hold a lock on ANY rrset, this could give deadlock. + * + * This routine is faster than a hashtable lookup: + * o no bin_lock is acquired. + * o no walk through the bin-overflow-list. + * o no comparison of the entry key to find it. + * + * @param r: rrset cache. + * @param key: rrset key. Marked recently used (if it was not deleted + * before the lock is acquired, in that case nothing happens). + * @param hash: hash value of the item. Please read it from the key when + * you have it locked. Used to find slab from slabhash. + * @param id: used to check that the item is unchanged and not deleted. + */ +void rrset_cache_touch(struct rrset_cache* r, struct ub_packed_rrset_key* key, + hashvalue_t hash, rrset_id_t id); + +/** + * Update an rrset in the rrset cache. Stores the information for later use. + * Will lookup if the rrset is in the cache and perform an update if necessary. + * If the item was present, and superior, references are returned to that. + * The passed item is then deallocated with rrset_parsedelete. + * + * A superior rrset is: + * o rrset with better trust value. + * o same trust value, different rdata, newly passed rrset is inserted. + * If rdata is the same, TTL in the cache is updated. + * + * @param r: the rrset cache. + * @param ref: reference (ptr and id) to the rrset. Pass reference setup for + * the new rrset. The reference may be changed if the cached rrset is + * superior. + * Before calling the rrset is presumed newly allocated and changeable. + * Afer calling you do not hold a lock, and the rrset is inserted in + * the hashtable so you need a lock to change it. + * @param alloc: how to allocate (and deallocate) the special rrset key. + * @param timenow: current time (to see if ttl in cache is expired). + * @return: true if the passed reference is updated, false if it is unchanged. + * 0: reference unchanged, inserted in cache. + * 1: reference updated, item is inserted in cache. + * 2: reference updated, item in cache is considered superior. + * also the rdata is equal (but other parameters in cache are superior). + */ +int rrset_cache_update(struct rrset_cache* r, struct rrset_ref* ref, + struct alloc_cache* alloc, time_t timenow); + +/** + * Lookup rrset. You obtain read/write lock. You must unlock before lookup + * anything of else. + * @param r: the rrset cache. + * @param qname: name of rrset to lookup. + * @param qnamelen: length of name of rrset to lookup. + * @param qtype: type of rrset to lookup (host order). + * @param qclass: class of rrset to lookup (host order). + * @param flags: rrset flags, or 0. + * @param timenow: used to compare with TTL. + * @param wr: set true to get writelock. + * @return packed rrset key pointer. Remember to unlock the key.entry.lock. + * or NULL if could not be found or it was timed out. + */ +struct ub_packed_rrset_key* rrset_cache_lookup(struct rrset_cache* r, + uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass, + uint32_t flags, time_t timenow, int wr); + +/** + * Obtain readlock on a (sorted) list of rrset references. + * Checks TTLs and IDs of the rrsets and rollbacks locking if not Ok. + * @param ref: array of rrset references (key pointer and ID value). + * duplicate references are allowed and handled. + * @param count: size of array. + * @param timenow: used to compare with TTL. + * @return true on success, false on a failure, which can be that some + * RRsets have timed out, or that they do not exist any more, the + * RRsets have been purged from the cache. + * If true, you hold readlocks on all the ref items. + */ +int rrset_array_lock(struct rrset_ref* ref, size_t count, time_t timenow); + +/** + * Unlock array (sorted) of rrset references. + * @param ref: array of rrset references (key pointer and ID value). + * duplicate references are allowed and handled. + * @param count: size of array. + */ +void rrset_array_unlock(struct rrset_ref* ref, size_t count); + +/** + * Unlock array (sorted) of rrset references and at the same time + * touch LRU on the rrsets. It needs the scratch region for temporary + * storage as it uses the initial locks to obtain hash values. + * @param r: the rrset cache. In this cache LRU is updated. + * @param scratch: region for temporary storage of hash values. + * if memory allocation fails, the lru touch fails silently, + * but locks are released. memory errors are logged. + * @param ref: array of rrset references (key pointer and ID value). + * duplicate references are allowed and handled. + * @param count: size of array. + */ +void rrset_array_unlock_touch(struct rrset_cache* r, struct regional* scratch, + struct rrset_ref* ref, size_t count); + +/** + * Update security status of an rrset. Looks up the rrset. + * If found, checks if rdata is equal. + * If so, it will update the security, trust and rrset-ttl values. + * The values are only updated if security is increased (towards secure). + * @param r: the rrset cache. + * @param rrset: which rrset to attempt to update. This rrset is left + * untouched. The rrset in the cache is updated in-place. + * @param now: current time. + */ +void rrset_update_sec_status(struct rrset_cache* r, + struct ub_packed_rrset_key* rrset, time_t now); + +/** + * Looks up security status of an rrset. Looks up the rrset. + * If found, checks if rdata is equal, and entry did not expire. + * If so, it will update the security, trust and rrset-ttl values. + * @param r: the rrset cache. + * @param rrset: This rrset may change security status due to the cache. + * But its status will only improve, towards secure. + * @param now: current time. + */ +void rrset_check_sec_status(struct rrset_cache* r, + struct ub_packed_rrset_key* rrset, time_t now); + +/** + * Remove an rrset from the cache, by name and type and flags + * @param r: rrset cache + * @param nm: name of rrset + * @param nmlen: length of name + * @param type: type of rrset + * @param dclass: class of rrset, host order + * @param flags: flags of rrset, host order + */ +void rrset_cache_remove(struct rrset_cache* r, uint8_t* nm, size_t nmlen, + uint16_t type, uint16_t dclass, uint32_t flags); + +/** mark rrset to be deleted, set id=0 */ +void rrset_markdel(void* key); + +#endif /* SERVICES_CACHE_RRSET_H */ diff --git a/external/unbound/services/listen_dnsport.c b/external/unbound/services/listen_dnsport.c new file mode 100644 index 000000000..b7ffb6d3f --- /dev/null +++ b/external/unbound/services/listen_dnsport.c @@ -0,0 +1,1062 @@ +/* + * services/listen_dnsport.c - listen on port 53 for incoming DNS queries. + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file has functions to get queries from clients. + */ +#include "config.h" +#ifdef HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif +#include <sys/time.h> +#include "services/listen_dnsport.h" +#include "services/outside_network.h" +#include "util/netevent.h" +#include "util/log.h" +#include "util/config_file.h" +#include "util/net_help.h" +#include "ldns/sbuffer.h" + +#ifdef HAVE_NETDB_H +#include <netdb.h> +#endif +#include <fcntl.h> + +/** number of queued TCP connections for listen() */ +#define TCP_BACKLOG 256 + +/** + * Debug print of the getaddrinfo returned address. + * @param addr: the address returned. + */ +static void +verbose_print_addr(struct addrinfo *addr) +{ + if(verbosity >= VERB_ALGO) { + char buf[100]; + void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr; +#ifdef INET6 + if(addr->ai_family == AF_INET6) + sinaddr = &((struct sockaddr_in6*)addr->ai_addr)-> + sin6_addr; +#endif /* INET6 */ + if(inet_ntop(addr->ai_family, sinaddr, buf, + (socklen_t)sizeof(buf)) == 0) { + (void)strlcpy(buf, "(null)", sizeof(buf)); + } + buf[sizeof(buf)-1] = 0; + verbose(VERB_ALGO, "creating %s%s socket %s %d", + addr->ai_socktype==SOCK_DGRAM?"udp": + addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto", + addr->ai_family==AF_INET?"4": + addr->ai_family==AF_INET6?"6": + "_otherfam", buf, + ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port)); + } +} + +int +create_udp_sock(int family, int socktype, struct sockaddr* addr, + socklen_t addrlen, int v6only, int* inuse, int* noproto, + int rcv, int snd, int listen, int* reuseport) +{ + int s; +#if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU) + int on=1; +#endif +#ifdef IPV6_MTU + int mtu = IPV6_MIN_MTU; +#endif +#if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF) + (void)rcv; +#endif +#if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF) + (void)snd; +#endif +#ifndef IPV6_V6ONLY + (void)v6only; +#endif + if((s = socket(family, socktype, 0)) == -1) { + *inuse = 0; +#ifndef USE_WINSOCK + if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { + *noproto = 1; + return -1; + } + log_err("can't create socket: %s", strerror(errno)); +#else + if(WSAGetLastError() == WSAEAFNOSUPPORT || + WSAGetLastError() == WSAEPROTONOSUPPORT) { + *noproto = 1; + return -1; + } + log_err("can't create socket: %s", + wsa_strerror(WSAGetLastError())); +#endif + *noproto = 0; + return -1; + } + if(listen) { +#ifdef SO_REUSEADDR + if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, + (socklen_t)sizeof(on)) < 0) { +#ifndef USE_WINSOCK + log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", + strerror(errno)); + if(errno != ENOSYS) { + close(s); + *noproto = 0; + *inuse = 0; + return -1; + } +#else + log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", + wsa_strerror(WSAGetLastError())); + closesocket(s); + *noproto = 0; + *inuse = 0; + return -1; +#endif + } +#endif /* SO_REUSEADDR */ +#ifdef SO_REUSEPORT + /* try to set SO_REUSEPORT so that incoming + * queries are distributed evenly among the receiving threads. + * Each thread must have its own socket bound to the same port, + * with SO_REUSEPORT set on each socket. + */ + if (reuseport && *reuseport && + setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, + (socklen_t)sizeof(on)) < 0) { +#ifdef ENOPROTOOPT + if(errno != ENOPROTOOPT || verbosity >= 3) + log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s", + strerror(errno)); +#endif + /* this option is not essential, we can continue */ + *reuseport = 0; + } +#else + (void)reuseport; +#endif /* defined(SO_REUSEPORT) */ + } + if(rcv) { +#ifdef SO_RCVBUF + int got; + socklen_t slen = (socklen_t)sizeof(got); +# ifdef SO_RCVBUFFORCE + /* Linux specific: try to use root permission to override + * system limits on rcvbuf. The limit is stored in + * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */ + if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, + (socklen_t)sizeof(rcv)) < 0) { + if(errno != EPERM) { +# ifndef USE_WINSOCK + log_err("setsockopt(..., SO_RCVBUFFORCE, " + "...) failed: %s", strerror(errno)); + close(s); +# else + log_err("setsockopt(..., SO_RCVBUFFORCE, " + "...) failed: %s", + wsa_strerror(WSAGetLastError())); + closesocket(s); +# endif + *noproto = 0; + *inuse = 0; + return -1; + } +# endif /* SO_RCVBUFFORCE */ + if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, + (socklen_t)sizeof(rcv)) < 0) { +# ifndef USE_WINSOCK + log_err("setsockopt(..., SO_RCVBUF, " + "...) failed: %s", strerror(errno)); + close(s); +# else + log_err("setsockopt(..., SO_RCVBUF, " + "...) failed: %s", + wsa_strerror(WSAGetLastError())); + closesocket(s); +# endif + *noproto = 0; + *inuse = 0; + return -1; + } + /* check if we got the right thing or if system + * reduced to some system max. Warn if so */ + if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got, + &slen) >= 0 && got < rcv/2) { + log_warn("so-rcvbuf %u was not granted. " + "Got %u. To fix: start with " + "root permissions(linux) or sysctl " + "bigger net.core.rmem_max(linux) or " + "kern.ipc.maxsockbuf(bsd) values.", + (unsigned)rcv, (unsigned)got); + } +# ifdef SO_RCVBUFFORCE + } +# endif +#endif /* SO_RCVBUF */ + } + /* first do RCVBUF as the receive buffer is more important */ + if(snd) { +#ifdef SO_SNDBUF + int got; + socklen_t slen = (socklen_t)sizeof(got); +# ifdef SO_SNDBUFFORCE + /* Linux specific: try to use root permission to override + * system limits on sndbuf. The limit is stored in + * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */ + if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, + (socklen_t)sizeof(snd)) < 0) { + if(errno != EPERM) { +# ifndef USE_WINSOCK + log_err("setsockopt(..., SO_SNDBUFFORCE, " + "...) failed: %s", strerror(errno)); + close(s); +# else + log_err("setsockopt(..., SO_SNDBUFFORCE, " + "...) failed: %s", + wsa_strerror(WSAGetLastError())); + closesocket(s); +# endif + *noproto = 0; + *inuse = 0; + return -1; + } +# endif /* SO_SNDBUFFORCE */ + if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, + (socklen_t)sizeof(snd)) < 0) { +# ifndef USE_WINSOCK + log_err("setsockopt(..., SO_SNDBUF, " + "...) failed: %s", strerror(errno)); + close(s); +# else + log_err("setsockopt(..., SO_SNDBUF, " + "...) failed: %s", + wsa_strerror(WSAGetLastError())); + closesocket(s); +# endif + *noproto = 0; + *inuse = 0; + return -1; + } + /* check if we got the right thing or if system + * reduced to some system max. Warn if so */ + if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got, + &slen) >= 0 && got < snd/2) { + log_warn("so-sndbuf %u was not granted. " + "Got %u. To fix: start with " + "root permissions(linux) or sysctl " + "bigger net.core.wmem_max(linux) or " + "kern.ipc.maxsockbuf(bsd) values.", + (unsigned)snd, (unsigned)got); + } +# ifdef SO_SNDBUFFORCE + } +# endif +#endif /* SO_SNDBUF */ + } + if(family == AF_INET6) { +# if defined(IPV6_V6ONLY) + if(v6only) { + int val=(v6only==2)?0:1; + if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, + (void*)&val, (socklen_t)sizeof(val)) < 0) { +#ifndef USE_WINSOCK + log_err("setsockopt(..., IPV6_V6ONLY" + ", ...) failed: %s", strerror(errno)); + close(s); +#else + log_err("setsockopt(..., IPV6_V6ONLY" + ", ...) failed: %s", + wsa_strerror(WSAGetLastError())); + closesocket(s); +#endif + *noproto = 0; + *inuse = 0; + return -1; + } + } +# endif +# if defined(IPV6_USE_MIN_MTU) + /* + * There is no fragmentation of IPv6 datagrams + * during forwarding in the network. Therefore + * we do not send UDP datagrams larger than + * the minimum IPv6 MTU of 1280 octets. The + * EDNS0 message length can be larger if the + * network stack supports IPV6_USE_MIN_MTU. + */ + if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU, + (void*)&on, (socklen_t)sizeof(on)) < 0) { +# ifndef USE_WINSOCK + log_err("setsockopt(..., IPV6_USE_MIN_MTU, " + "...) failed: %s", strerror(errno)); + close(s); +# else + log_err("setsockopt(..., IPV6_USE_MIN_MTU, " + "...) failed: %s", + wsa_strerror(WSAGetLastError())); + closesocket(s); +# endif + *noproto = 0; + *inuse = 0; + return -1; + } +# elif defined(IPV6_MTU) + /* + * On Linux, to send no larger than 1280, the PMTUD is + * disabled by default for datagrams anyway, so we set + * the MTU to use. + */ + if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU, + (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) { +# ifndef USE_WINSOCK + log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", + strerror(errno)); + close(s); +# else + log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", + wsa_strerror(WSAGetLastError())); + closesocket(s); +# endif + *noproto = 0; + *inuse = 0; + return -1; + } +# endif /* IPv6 MTU */ + } else if(family == AF_INET) { +# if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) +/* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that + * PMTU information is not accepted, but fragmentation is allowed + * if and only if the packet size exceeds the outgoing interface MTU + * (and also uses the interface mtu to determine the size of the packets). + * So there won't be any EMSGSIZE error. Against DNS fragmentation attacks. + * FreeBSD already has same semantics without setting the option. */ +# if defined(IP_PMTUDISC_OMIT) + int action = IP_PMTUDISC_OMIT; +# else + int action = IP_PMTUDISC_DONT; +# endif + if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, + &action, (socklen_t)sizeof(action)) < 0) { + log_err("setsockopt(..., IP_MTU_DISCOVER, " +# if defined(IP_PMTUDISC_OMIT) + "IP_PMTUDISC_OMIT" +# else + "IP_PMTUDISC_DONT" +# endif + "...) failed: %s", + strerror(errno)); +# ifndef USE_WINSOCK + close(s); +# else + closesocket(s); +# endif + *noproto = 0; + *inuse = 0; + return -1; + } +# elif defined(IP_DONTFRAG) + int off = 0; + if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG, + &off, (socklen_t)sizeof(off)) < 0) { + log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s", + strerror(errno)); +# ifndef USE_WINSOCK + close(s); +# else + closesocket(s); +# endif + *noproto = 0; + *inuse = 0; + return -1; + } +# endif /* IPv4 MTU */ + } + if(bind(s, (struct sockaddr*)addr, addrlen) != 0) { + *noproto = 0; + *inuse = 0; +#ifndef USE_WINSOCK +#ifdef EADDRINUSE + *inuse = (errno == EADDRINUSE); + /* detect freebsd jail with no ipv6 permission */ + if(family==AF_INET6 && errno==EINVAL) + *noproto = 1; + else if(errno != EADDRINUSE) { + log_err_addr("can't bind socket", strerror(errno), + (struct sockaddr_storage*)addr, addrlen); + } +#endif /* EADDRINUSE */ + close(s); +#else /* USE_WINSOCK */ + if(WSAGetLastError() != WSAEADDRINUSE && + WSAGetLastError() != WSAEADDRNOTAVAIL) { + log_err_addr("can't bind socket", + wsa_strerror(WSAGetLastError()), + (struct sockaddr_storage*)addr, addrlen); + } + closesocket(s); +#endif + return -1; + } + if(!fd_set_nonblock(s)) { + *noproto = 0; + *inuse = 0; +#ifndef USE_WINSOCK + close(s); +#else + closesocket(s); +#endif + return -1; + } + return s; +} + +int +create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, + int* reuseport) +{ + int s; +#if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) + int on = 1; +#endif /* SO_REUSEADDR || IPV6_V6ONLY */ + verbose_print_addr(addr); + *noproto = 0; + if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) { +#ifndef USE_WINSOCK + if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { + *noproto = 1; + return -1; + } + log_err("can't create socket: %s", strerror(errno)); +#else + if(WSAGetLastError() == WSAEAFNOSUPPORT || + WSAGetLastError() == WSAEPROTONOSUPPORT) { + *noproto = 1; + return -1; + } + log_err("can't create socket: %s", + wsa_strerror(WSAGetLastError())); +#endif + return -1; + } +#ifdef SO_REUSEADDR + if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, + (socklen_t)sizeof(on)) < 0) { +#ifndef USE_WINSOCK + log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", + strerror(errno)); + close(s); +#else + log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", + wsa_strerror(WSAGetLastError())); + closesocket(s); +#endif + return -1; + } +#endif /* SO_REUSEADDR */ +#ifdef SO_REUSEPORT + /* try to set SO_REUSEPORT so that incoming + * connections are distributed evenly among the receiving threads. + * Each thread must have its own socket bound to the same port, + * with SO_REUSEPORT set on each socket. + */ + if (reuseport && *reuseport && + setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, + (socklen_t)sizeof(on)) < 0) { +#ifdef ENOPROTOOPT + if(errno != ENOPROTOOPT || verbosity >= 3) + log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s", + strerror(errno)); +#endif + /* this option is not essential, we can continue */ + *reuseport = 0; + } +#else + (void)reuseport; +#endif /* defined(SO_REUSEPORT) */ +#if defined(IPV6_V6ONLY) + if(addr->ai_family == AF_INET6 && v6only) { + if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, + (void*)&on, (socklen_t)sizeof(on)) < 0) { +#ifndef USE_WINSOCK + log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s", + strerror(errno)); + close(s); +#else + log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s", + wsa_strerror(WSAGetLastError())); + closesocket(s); +#endif + return -1; + } + } +#else + (void)v6only; +#endif /* IPV6_V6ONLY */ + if(bind(s, addr->ai_addr, addr->ai_addrlen) != 0) { +#ifndef USE_WINSOCK + /* detect freebsd jail with no ipv6 permission */ + if(addr->ai_family==AF_INET6 && errno==EINVAL) + *noproto = 1; + else { + log_err_addr("can't bind socket", strerror(errno), + (struct sockaddr_storage*)addr->ai_addr, + addr->ai_addrlen); + } + close(s); +#else + log_err_addr("can't bind socket", + wsa_strerror(WSAGetLastError()), + (struct sockaddr_storage*)addr->ai_addr, + addr->ai_addrlen); + closesocket(s); +#endif + return -1; + } + if(!fd_set_nonblock(s)) { +#ifndef USE_WINSOCK + close(s); +#else + closesocket(s); +#endif + return -1; + } + if(listen(s, TCP_BACKLOG) == -1) { +#ifndef USE_WINSOCK + log_err("can't listen: %s", strerror(errno)); + close(s); +#else + log_err("can't listen: %s", wsa_strerror(WSAGetLastError())); + closesocket(s); +#endif + return -1; + } + return s; +} + +/** + * Create socket from getaddrinfo results + */ +static int +make_sock(int stype, const char* ifname, const char* port, + struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, + int* reuseport) +{ + struct addrinfo *res = NULL; + int r, s, inuse, noproto; + hints->ai_socktype = stype; + *noip6 = 0; + if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) { +#ifdef USE_WINSOCK + if(r == EAI_NONAME && hints->ai_family == AF_INET6){ + *noip6 = 1; /* 'Host not found' for IP6 on winXP */ + return -1; + } +#endif + log_err("node %s:%s getaddrinfo: %s %s", + ifname?ifname:"default", port, gai_strerror(r), +#ifdef EAI_SYSTEM + r==EAI_SYSTEM?(char*)strerror(errno):"" +#else + "" +#endif + ); + return -1; + } + if(stype == SOCK_DGRAM) { + verbose_print_addr(res); + s = create_udp_sock(res->ai_family, res->ai_socktype, + (struct sockaddr*)res->ai_addr, res->ai_addrlen, + v6only, &inuse, &noproto, (int)rcv, (int)snd, 1, + reuseport); + if(s == -1 && inuse) { + log_err("bind: address already in use"); + } else if(s == -1 && noproto && hints->ai_family == AF_INET6){ + *noip6 = 1; + } + } else { + s = create_tcp_accept_sock(res, v6only, &noproto, reuseport); + if(s == -1 && noproto && hints->ai_family == AF_INET6){ + *noip6 = 1; + } + } + freeaddrinfo(res); + return s; +} + +/** make socket and first see if ifname contains port override info */ +static int +make_sock_port(int stype, const char* ifname, const char* port, + struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, + int* reuseport) +{ + char* s = strchr(ifname, '@'); + if(s) { + /* override port with ifspec@port */ + char p[16]; + char newif[128]; + if((size_t)(s-ifname) >= sizeof(newif)) { + log_err("ifname too long: %s", ifname); + *noip6 = 0; + return -1; + } + if(strlen(s+1) >= sizeof(p)) { + log_err("portnumber too long: %s", ifname); + *noip6 = 0; + return -1; + } + (void)strlcpy(newif, ifname, sizeof(newif)); + newif[s-ifname] = 0; + (void)strlcpy(p, s+1, sizeof(p)); + p[strlen(s+1)]=0; + return make_sock(stype, newif, p, hints, v6only, noip6, + rcv, snd, reuseport); + } + return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd, + reuseport); +} + +/** + * Add port to open ports list. + * @param list: list head. changed. + * @param s: fd. + * @param ftype: if fd is UDP. + * @return false on failure. list in unchanged then. + */ +static int +port_insert(struct listen_port** list, int s, enum listen_type ftype) +{ + struct listen_port* item = (struct listen_port*)malloc( + sizeof(struct listen_port)); + if(!item) + return 0; + item->next = *list; + item->fd = s; + item->ftype = ftype; + *list = item; + return 1; +} + +/** set fd to receive source address packet info */ +static int +set_recvpktinfo(int s, int family) +{ +#if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO) + int on = 1; +#else + (void)s; +#endif + if(family == AF_INET6) { +# ifdef IPV6_RECVPKTINFO + if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, + (void*)&on, (socklen_t)sizeof(on)) < 0) { + log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s", + strerror(errno)); + return 0; + } +# elif defined(IPV6_PKTINFO) + if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO, + (void*)&on, (socklen_t)sizeof(on)) < 0) { + log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s", + strerror(errno)); + return 0; + } +# else + log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please " + "disable interface-automatic in config"); + return 0; +# endif /* defined IPV6_RECVPKTINFO */ + + } else if(family == AF_INET) { +# ifdef IP_PKTINFO + if(setsockopt(s, IPPROTO_IP, IP_PKTINFO, + (void*)&on, (socklen_t)sizeof(on)) < 0) { + log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s", + strerror(errno)); + return 0; + } +# elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR) + if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR, + (void*)&on, (socklen_t)sizeof(on)) < 0) { + log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s", + strerror(errno)); + return 0; + } +# else + log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable " + "interface-automatic in config"); + return 0; +# endif /* IP_PKTINFO */ + + } + return 1; +} + +/** + * Helper for ports_open. Creates one interface (or NULL for default). + * @param ifname: The interface ip address. + * @param do_auto: use automatic interface detection. + * If enabled, then ifname must be the wildcard name. + * @param do_udp: if udp should be used. + * @param do_tcp: if udp should be used. + * @param hints: for getaddrinfo. family and flags have to be set by caller. + * @param port: Port number to use (as string). + * @param list: list of open ports, appended to, changed to point to list head. + * @param rcv: receive buffer size for UDP + * @param snd: send buffer size for UDP + * @param ssl_port: ssl service port number + * @param reuseport: try to set SO_REUSEPORT if nonNULL and true. + * set to false on exit if reuseport failed due to no kernel support. + * @return: returns false on error. + */ +static int +ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, + struct addrinfo *hints, const char* port, struct listen_port** list, + size_t rcv, size_t snd, int ssl_port, int* reuseport) +{ + int s, noip6=0; + if(!do_udp && !do_tcp) + return 0; + if(do_auto) { + if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, + &noip6, rcv, snd, reuseport)) == -1) { + if(noip6) { + log_warn("IPv6 protocol not available"); + return 1; + } + return 0; + } + /* getting source addr packet info is highly non-portable */ + if(!set_recvpktinfo(s, hints->ai_family)) { +#ifndef USE_WINSOCK + close(s); +#else + closesocket(s); +#endif + return 0; + } + if(!port_insert(list, s, listen_type_udpancil)) { +#ifndef USE_WINSOCK + close(s); +#else + closesocket(s); +#endif + return 0; + } + } else if(do_udp) { + /* regular udp socket */ + if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, + &noip6, rcv, snd, reuseport)) == -1) { + if(noip6) { + log_warn("IPv6 protocol not available"); + return 1; + } + return 0; + } + if(!port_insert(list, s, listen_type_udp)) { +#ifndef USE_WINSOCK + close(s); +#else + closesocket(s); +#endif + return 0; + } + } + if(do_tcp) { + int is_ssl = ((strchr(ifname, '@') && + atoi(strchr(ifname, '@')+1) == ssl_port) || + (!strchr(ifname, '@') && atoi(port) == ssl_port)); + if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, + &noip6, 0, 0, reuseport)) == -1) { + if(noip6) { + /*log_warn("IPv6 protocol not available");*/ + return 1; + } + return 0; + } + if(is_ssl) + verbose(VERB_ALGO, "setup TCP for SSL service"); + if(!port_insert(list, s, is_ssl?listen_type_ssl: + listen_type_tcp)) { +#ifndef USE_WINSOCK + close(s); +#else + closesocket(s); +#endif + return 0; + } + } + return 1; +} + +/** + * Add items to commpoint list in front. + * @param c: commpoint to add. + * @param front: listen struct. + * @return: false on failure. + */ +static int +listen_cp_insert(struct comm_point* c, struct listen_dnsport* front) +{ + struct listen_list* item = (struct listen_list*)malloc( + sizeof(struct listen_list)); + if(!item) + return 0; + item->com = c; + item->next = front->cps; + front->cps = item; + return 1; +} + +struct listen_dnsport* +listen_create(struct comm_base* base, struct listen_port* ports, + size_t bufsize, int tcp_accept_count, void* sslctx, + struct dt_env* dtenv, comm_point_callback_t* cb, void *cb_arg) +{ + struct listen_dnsport* front = (struct listen_dnsport*) + malloc(sizeof(struct listen_dnsport)); + if(!front) + return NULL; + front->cps = NULL; + front->udp_buff = sldns_buffer_new(bufsize); + if(!front->udp_buff) { + free(front); + return NULL; + } + + /* create comm points as needed */ + while(ports) { + struct comm_point* cp = NULL; + if(ports->ftype == listen_type_udp) + cp = comm_point_create_udp(base, ports->fd, + front->udp_buff, cb, cb_arg); + else if(ports->ftype == listen_type_tcp) + cp = comm_point_create_tcp(base, ports->fd, + tcp_accept_count, bufsize, cb, cb_arg); + else if(ports->ftype == listen_type_ssl) { + cp = comm_point_create_tcp(base, ports->fd, + tcp_accept_count, bufsize, cb, cb_arg); + cp->ssl = sslctx; + } else if(ports->ftype == listen_type_udpancil) + cp = comm_point_create_udp_ancil(base, ports->fd, + front->udp_buff, cb, cb_arg); + if(!cp) { + log_err("can't create commpoint"); + listen_delete(front); + return NULL; + } + cp->dtenv = dtenv; + cp->do_not_close = 1; + if(!listen_cp_insert(cp, front)) { + log_err("malloc failed"); + comm_point_delete(cp); + listen_delete(front); + return NULL; + } + ports = ports->next; + } + if(!front->cps) { + log_err("Could not open sockets to accept queries."); + listen_delete(front); + return NULL; + } + + return front; +} + +void +listen_list_delete(struct listen_list* list) +{ + struct listen_list *p = list, *pn; + while(p) { + pn = p->next; + comm_point_delete(p->com); + free(p); + p = pn; + } +} + +void +listen_delete(struct listen_dnsport* front) +{ + if(!front) + return; + listen_list_delete(front->cps); + sldns_buffer_free(front->udp_buff); + free(front); +} + +struct listen_port* +listening_ports_open(struct config_file* cfg, int* reuseport) +{ + struct listen_port* list = NULL; + struct addrinfo hints; + int i, do_ip4, do_ip6; + int do_tcp, do_auto; + char portbuf[32]; + snprintf(portbuf, sizeof(portbuf), "%d", cfg->port); + do_ip4 = cfg->do_ip4; + do_ip6 = cfg->do_ip6; + do_tcp = cfg->do_tcp; + do_auto = cfg->if_automatic && cfg->do_udp; + if(cfg->incoming_num_tcp == 0) + do_tcp = 0; + + /* getaddrinfo */ + memset(&hints, 0, sizeof(hints)); + hints.ai_flags = AI_PASSIVE; + /* no name lookups on our listening ports */ + if(cfg->num_ifs > 0) + hints.ai_flags |= AI_NUMERICHOST; + hints.ai_family = AF_UNSPEC; +#ifndef INET6 + do_ip6 = 0; +#endif + if(!do_ip4 && !do_ip6) { + return NULL; + } + /* create ip4 and ip6 ports so that return addresses are nice. */ + if(do_auto || cfg->num_ifs == 0) { + if(do_ip6) { + hints.ai_family = AF_INET6; + if(!ports_create_if(do_auto?"::0":"::1", + do_auto, cfg->do_udp, do_tcp, + &hints, portbuf, &list, + cfg->so_rcvbuf, cfg->so_sndbuf, + cfg->ssl_port, reuseport)) { + listening_ports_free(list); + return NULL; + } + } + if(do_ip4) { + hints.ai_family = AF_INET; + if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1", + do_auto, cfg->do_udp, do_tcp, + &hints, portbuf, &list, + cfg->so_rcvbuf, cfg->so_sndbuf, + cfg->ssl_port, reuseport)) { + listening_ports_free(list); + return NULL; + } + } + } else for(i = 0; i<cfg->num_ifs; i++) { + if(str_is_ip6(cfg->ifs[i])) { + if(!do_ip6) + continue; + hints.ai_family = AF_INET6; + if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp, + do_tcp, &hints, portbuf, &list, + cfg->so_rcvbuf, cfg->so_sndbuf, + cfg->ssl_port, reuseport)) { + listening_ports_free(list); + return NULL; + } + } else { + if(!do_ip4) + continue; + hints.ai_family = AF_INET; + if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp, + do_tcp, &hints, portbuf, &list, + cfg->so_rcvbuf, cfg->so_sndbuf, + cfg->ssl_port, reuseport)) { + listening_ports_free(list); + return NULL; + } + } + } + return list; +} + +void listening_ports_free(struct listen_port* list) +{ + struct listen_port* nx; + while(list) { + nx = list->next; + if(list->fd != -1) { +#ifndef USE_WINSOCK + close(list->fd); +#else + closesocket(list->fd); +#endif + } + free(list); + list = nx; + } +} + +size_t listen_get_mem(struct listen_dnsport* listen) +{ + size_t s = sizeof(*listen) + sizeof(*listen->base) + + sizeof(*listen->udp_buff) + + sldns_buffer_capacity(listen->udp_buff); + struct listen_list* p; + for(p = listen->cps; p; p = p->next) { + s += sizeof(*p); + s += comm_point_get_mem(p->com); + } + return s; +} + +void listen_stop_accept(struct listen_dnsport* listen) +{ + /* do not stop the ones that have no tcp_free list + * (they have already stopped listening) */ + struct listen_list* p; + for(p=listen->cps; p; p=p->next) { + if(p->com->type == comm_tcp_accept && + p->com->tcp_free != NULL) { + comm_point_stop_listening(p->com); + } + } +} + +void listen_start_accept(struct listen_dnsport* listen) +{ + /* do not start the ones that have no tcp_free list, it is no + * use to listen to them because they have no free tcp handlers */ + struct listen_list* p; + for(p=listen->cps; p; p=p->next) { + if(p->com->type == comm_tcp_accept && + p->com->tcp_free != NULL) { + comm_point_start_listening(p->com, -1, -1); + } + } +} + diff --git a/external/unbound/services/listen_dnsport.h b/external/unbound/services/listen_dnsport.h new file mode 100644 index 000000000..075f6d281 --- /dev/null +++ b/external/unbound/services/listen_dnsport.h @@ -0,0 +1,210 @@ +/* + * services/listen_dnsport.h - listen on port 53 for incoming DNS queries. + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file has functions to get queries from clients. + */ + +#ifndef LISTEN_DNSPORT_H +#define LISTEN_DNSPORT_H + +#include "util/netevent.h" +struct listen_list; +struct config_file; +struct addrinfo; +struct sldns_buffer; + +/** + * Listening for queries structure. + * Contains list of query-listen sockets. + */ +struct listen_dnsport { + /** Base for select calls */ + struct comm_base* base; + + /** buffer shared by UDP connections, since there is only one + datagram at any time. */ + struct sldns_buffer* udp_buff; + + /** list of comm points used to get incoming events */ + struct listen_list* cps; +}; + +/** + * Single linked list to store event points. + */ +struct listen_list { + /** next in list */ + struct listen_list* next; + /** event info */ + struct comm_point* com; +}; + +/** + * type of ports + */ +enum listen_type { + /** udp type */ + listen_type_udp, + /** tcp type */ + listen_type_tcp, + /** udp ipv6 (v4mapped) for use with ancillary data */ + listen_type_udpancil, + /** ssl over tcp type */ + listen_type_ssl +}; + +/** + * Single linked list to store shared ports that have been + * opened for use by all threads. + */ +struct listen_port { + /** next in list */ + struct listen_port* next; + /** file descriptor, open and ready for use */ + int fd; + /** type of file descriptor, udp or tcp */ + enum listen_type ftype; +}; + +/** + * Create shared listening ports + * Getaddrinfo, create socket, bind and listen to zero or more + * interfaces for IP4 and/or IP6, for UDP and/or TCP. + * On the given port number. It creates the sockets. + * @param cfg: settings on what ports to open. + * @param reuseport: set to true if you want reuseport, or NULL to not have it, + * set to false on exit if reuseport failed to apply (because of no + * kernel support). + * @return: linked list of ports or NULL on error. + */ +struct listen_port* listening_ports_open(struct config_file* cfg, + int* reuseport); + +/** + * Close and delete the (list of) listening ports. + */ +void listening_ports_free(struct listen_port* list); + +/** + * Create commpoints with for this thread for the shared ports. + * @param base: the comm_base that provides event functionality. + * for default all ifs. + * @param ports: the list of shared ports. + * @param bufsize: size of datagram buffer. + * @param tcp_accept_count: max number of simultaneous TCP connections + * from clients. + * @param sslctx: nonNULL if ssl context. + * @param dtenv: nonNULL if dnstap enabled. + * @param cb: callback function when a request arrives. It is passed + * the packet and user argument. Return true to send a reply. + * @param cb_arg: user data argument for callback function. + * @return: the malloced listening structure, ready for use. NULL on error. + */ +struct listen_dnsport* listen_create(struct comm_base* base, + struct listen_port* ports, size_t bufsize, int tcp_accept_count, + void* sslctx, struct dt_env *dtenv, comm_point_callback_t* cb, + void* cb_arg); + +/** + * delete the listening structure + * @param listen: listening structure. + */ +void listen_delete(struct listen_dnsport* listen); + +/** + * delete listen_list of commpoints. Calls commpointdelete() on items. + * This may close the fds or not depending on flags. + * @param list: to delete. + */ +void listen_list_delete(struct listen_list* list); + +/** + * get memory size used by the listening structs + * @param listen: listening structure. + * @return: size in bytes. + */ +size_t listen_get_mem(struct listen_dnsport* listen); + +/** + * stop accept handlers for TCP (until enabled again) + * @param listen: listening structure. + */ +void listen_stop_accept(struct listen_dnsport* listen); + +/** + * start accept handlers for TCP (was stopped before) + * @param listen: listening structure. + */ +void listen_start_accept(struct listen_dnsport* listen); + +/** + * Create and bind nonblocking UDP socket + * @param family: for socket call. + * @param socktype: for socket call. + * @param addr: for bind call. + * @param addrlen: for bind call. + * @param v6only: if enabled, IP6 sockets get IP6ONLY option set. + * if enabled with value 2 IP6ONLY option is disabled. + * @param inuse: on error, this is set true if the port was in use. + * @param noproto: on error, this is set true if cause is that the + IPv6 proto (family) is not available. + * @param rcv: set size on rcvbuf with socket option, if 0 it is not set. + * @param snd: set size on sndbuf with socket option, if 0 it is not set. + * @param listen: if true, this is a listening UDP port, eg port 53, and + * set SO_REUSEADDR on it. + * @param reuseport: if nonNULL and true, try to set SO_REUSEPORT on + * listening UDP port. Set to false on return if it failed to do so. + * @return: the socket. -1 on error. + */ +int create_udp_sock(int family, int socktype, struct sockaddr* addr, + socklen_t addrlen, int v6only, int* inuse, int* noproto, int rcv, + int snd, int listen, int* reuseport); + +/** + * Create and bind TCP listening socket + * @param addr: address info ready to make socket. + * @param v6only: enable ip6 only flag on ip6 sockets. + * @param noproto: if error caused by lack of protocol support. + * @param reuseport: if nonNULL and true, try to set SO_REUSEPORT on + * listening UDP port. Set to false on return if it failed to do so. + * @return: the socket. -1 on error. + */ +int create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, + int* reuseport); + +#endif /* LISTEN_DNSPORT_H */ diff --git a/external/unbound/services/localzone.c b/external/unbound/services/localzone.c new file mode 100644 index 000000000..d285a127c --- /dev/null +++ b/external/unbound/services/localzone.c @@ -0,0 +1,1400 @@ +/* + * services/localzone.c - local zones authority service. + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file contains functions to enable local zone authority service. + */ +#include "config.h" +#include "services/localzone.h" +#include "ldns/str2wire.h" +#include "ldns/sbuffer.h" +#include "util/regional.h" +#include "util/config_file.h" +#include "util/data/dname.h" +#include "util/data/packed_rrset.h" +#include "util/data/msgencode.h" +#include "util/net_help.h" +#include "util/data/msgreply.h" +#include "util/data/msgparse.h" + +struct local_zones* +local_zones_create(void) +{ + struct local_zones* zones = (struct local_zones*)calloc(1, + sizeof(*zones)); + if(!zones) + return NULL; + rbtree_init(&zones->ztree, &local_zone_cmp); + lock_rw_init(&zones->lock); + lock_protect(&zones->lock, &zones->ztree, sizeof(zones->ztree)); + /* also lock protects the rbnode's in struct local_zone */ + return zones; +} + +/** helper traverse to delete zones */ +static void +lzdel(rbnode_t* n, void* ATTR_UNUSED(arg)) +{ + struct local_zone* z = (struct local_zone*)n->key; + local_zone_delete(z); +} + +void +local_zones_delete(struct local_zones* zones) +{ + if(!zones) + return; + lock_rw_destroy(&zones->lock); + /* walk through zones and delete them all */ + traverse_postorder(&zones->ztree, lzdel, NULL); + free(zones); +} + +void +local_zone_delete(struct local_zone* z) +{ + if(!z) + return; + lock_rw_destroy(&z->lock); + regional_destroy(z->region); + free(z->name); + free(z); +} + +int +local_zone_cmp(const void* z1, const void* z2) +{ + /* first sort on class, so that hierarchy can be maintained within + * a class */ + struct local_zone* a = (struct local_zone*)z1; + struct local_zone* b = (struct local_zone*)z2; + int m; + if(a->dclass != b->dclass) { + if(a->dclass < b->dclass) + return -1; + return 1; + } + return dname_lab_cmp(a->name, a->namelabs, b->name, b->namelabs, &m); +} + +int +local_data_cmp(const void* d1, const void* d2) +{ + struct local_data* a = (struct local_data*)d1; + struct local_data* b = (struct local_data*)d2; + int m; + return dname_canon_lab_cmp(a->name, a->namelabs, b->name, + b->namelabs, &m); +} + +/* form wireformat from text format domain name */ +int +parse_dname(const char* str, uint8_t** res, size_t* len, int* labs) +{ + *res = sldns_str2wire_dname(str, len); + *labs = 0; + if(!*res) { + log_err("cannot parse name %s", str); + return 0; + } + *labs = dname_count_size_labels(*res, len); + return 1; +} + +/** create a new localzone */ +static struct local_zone* +local_zone_create(uint8_t* nm, size_t len, int labs, + enum localzone_type t, uint16_t dclass) +{ + struct local_zone* z = (struct local_zone*)calloc(1, sizeof(*z)); + if(!z) { + return NULL; + } + z->node.key = z; + z->dclass = dclass; + z->type = t; + z->name = nm; + z->namelen = len; + z->namelabs = labs; + lock_rw_init(&z->lock); + z->region = regional_create(); + if(!z->region) { + free(z); + return NULL; + } + rbtree_init(&z->data, &local_data_cmp); + lock_protect(&z->lock, &z->parent, sizeof(*z)-sizeof(rbnode_t)); + /* also the zones->lock protects node, parent, name*, class */ + return z; +} + +/** enter a new zone with allocated dname returns with WRlock */ +static struct local_zone* +lz_enter_zone_dname(struct local_zones* zones, uint8_t* nm, size_t len, + int labs, enum localzone_type t, uint16_t c) +{ + struct local_zone* z = local_zone_create(nm, len, labs, t, c); + if(!z) { + log_err("out of memory"); + return NULL; + } + + /* add to rbtree */ + lock_rw_wrlock(&zones->lock); + lock_rw_wrlock(&z->lock); + if(!rbtree_insert(&zones->ztree, &z->node)) { + log_warn("duplicate local-zone"); + lock_rw_unlock(&z->lock); + local_zone_delete(z); + lock_rw_unlock(&zones->lock); + return NULL; + } + lock_rw_unlock(&zones->lock); + return z; +} + +/** enter a new zone */ +static struct local_zone* +lz_enter_zone(struct local_zones* zones, const char* name, const char* type, + uint16_t dclass) +{ + struct local_zone* z; + enum localzone_type t; + uint8_t* nm; + size_t len; + int labs; + if(!parse_dname(name, &nm, &len, &labs)) { + log_err("bad zone name %s %s", name, type); + return NULL; + } + if(!local_zone_str2type(type, &t)) { + log_err("bad lz_enter_zone type %s %s", name, type); + free(nm); + return NULL; + } + if(!(z=lz_enter_zone_dname(zones, nm, len, labs, t, dclass))) { + log_err("could not enter zone %s %s", name, type); + return NULL; + } + return z; +} + +/** return name and class and rdata of rr; parses string */ +static int +get_rr_content(const char* str, uint8_t** nm, uint16_t* type, + uint16_t* dclass, time_t* ttl, uint8_t* rr, size_t len, + uint8_t** rdata, size_t* rdata_len) +{ + size_t dname_len = 0; + int e = sldns_str2wire_rr_buf(str, rr, &len, &dname_len, 3600, + NULL, 0, NULL, 0); + if(e) { + log_err("error parsing local-data at %d: '%s': %s", + LDNS_WIREPARSE_OFFSET(e), str, + sldns_get_errorstr_parse(e)); + return 0; + } + *nm = memdup(rr, dname_len); + if(!*nm) { + log_err("out of memory"); + return 0; + } + *dclass = sldns_wirerr_get_class(rr, len, dname_len); + *type = sldns_wirerr_get_type(rr, len, dname_len); + *ttl = (time_t)sldns_wirerr_get_ttl(rr, len, dname_len); + *rdata = sldns_wirerr_get_rdatawl(rr, len, dname_len); + *rdata_len = sldns_wirerr_get_rdatalen(rr, len, dname_len)+2; + return 1; +} + +/** return name and class of rr; parses string */ +static int +get_rr_nameclass(const char* str, uint8_t** nm, uint16_t* dclass) +{ + uint8_t rr[LDNS_RR_BUF_SIZE]; + size_t len = sizeof(rr), dname_len = 0; + int s = sldns_str2wire_rr_buf(str, rr, &len, &dname_len, 3600, + NULL, 0, NULL, 0); + if(s != 0) { + log_err("error parsing local-data at %d '%s': %s", + LDNS_WIREPARSE_OFFSET(s), str, + sldns_get_errorstr_parse(s)); + return 0; + } + *nm = memdup(rr, dname_len); + *dclass = sldns_wirerr_get_class(rr, len, dname_len); + if(!*nm) { + log_err("out of memory"); + return 0; + } + return 1; +} + +/** + * Find an rrset in local data structure. + * @param data: local data domain name structure. + * @param type: type to look for (host order). + * @return rrset pointer or NULL if not found. + */ +static struct local_rrset* +local_data_find_type(struct local_data* data, uint16_t type) +{ + struct local_rrset* p; + type = htons(type); + for(p = data->rrsets; p; p = p->next) { + if(p->rrset->rk.type == type) + return p; + } + return NULL; +} + +/** check for RR duplicates */ +static int +rr_is_duplicate(struct packed_rrset_data* pd, uint8_t* rdata, size_t rdata_len) +{ + size_t i; + for(i=0; i<pd->count; i++) { + if(pd->rr_len[i] == rdata_len && + memcmp(pd->rr_data[i], rdata, rdata_len) == 0) + return 1; + } + return 0; +} + +/** new local_rrset */ +static struct local_rrset* +new_local_rrset(struct regional* region, struct local_data* node, + uint16_t rrtype, uint16_t rrclass) +{ + struct packed_rrset_data* pd; + struct local_rrset* rrset = (struct local_rrset*) + regional_alloc_zero(region, sizeof(*rrset)); + if(!rrset) { + log_err("out of memory"); + return NULL; + } + rrset->next = node->rrsets; + node->rrsets = rrset; + rrset->rrset = (struct ub_packed_rrset_key*) + regional_alloc_zero(region, sizeof(*rrset->rrset)); + if(!rrset->rrset) { + log_err("out of memory"); + return NULL; + } + rrset->rrset->entry.key = rrset->rrset; + pd = (struct packed_rrset_data*)regional_alloc_zero(region, + sizeof(*pd)); + if(!pd) { + log_err("out of memory"); + return NULL; + } + pd->trust = rrset_trust_prim_noglue; + pd->security = sec_status_insecure; + rrset->rrset->entry.data = pd; + rrset->rrset->rk.dname = node->name; + rrset->rrset->rk.dname_len = node->namelen; + rrset->rrset->rk.type = htons(rrtype); + rrset->rrset->rk.rrset_class = htons(rrclass); + return rrset; +} + +/** insert RR into RRset data structure; Wastes a couple of bytes */ +static int +insert_rr(struct regional* region, struct packed_rrset_data* pd, + uint8_t* rdata, size_t rdata_len, time_t ttl) +{ + size_t* oldlen = pd->rr_len; + time_t* oldttl = pd->rr_ttl; + uint8_t** olddata = pd->rr_data; + + /* add RR to rrset */ + pd->count++; + pd->rr_len = regional_alloc(region, sizeof(*pd->rr_len)*pd->count); + pd->rr_ttl = regional_alloc(region, sizeof(*pd->rr_ttl)*pd->count); + pd->rr_data = regional_alloc(region, sizeof(*pd->rr_data)*pd->count); + if(!pd->rr_len || !pd->rr_ttl || !pd->rr_data) { + log_err("out of memory"); + return 0; + } + if(pd->count > 1) { + memcpy(pd->rr_len+1, oldlen, + sizeof(*pd->rr_len)*(pd->count-1)); + memcpy(pd->rr_ttl+1, oldttl, + sizeof(*pd->rr_ttl)*(pd->count-1)); + memcpy(pd->rr_data+1, olddata, + sizeof(*pd->rr_data)*(pd->count-1)); + } + pd->rr_len[0] = rdata_len; + pd->rr_ttl[0] = ttl; + pd->rr_data[0] = regional_alloc_init(region, rdata, rdata_len); + if(!pd->rr_data[0]) { + log_err("out of memory"); + return 0; + } + return 1; +} + +/** find a data node by exact name */ +static struct local_data* +lz_find_node(struct local_zone* z, uint8_t* nm, size_t nmlen, int nmlabs) +{ + struct local_data key; + key.node.key = &key; + key.name = nm; + key.namelen = nmlen; + key.namelabs = nmlabs; + return (struct local_data*)rbtree_search(&z->data, &key.node); +} + +/** find a node, create it if not and all its empty nonterminal parents */ +static int +lz_find_create_node(struct local_zone* z, uint8_t* nm, size_t nmlen, + int nmlabs, struct local_data** res) +{ + struct local_data* ld = lz_find_node(z, nm, nmlen, nmlabs); + if(!ld) { + /* create a domain name to store rr. */ + ld = (struct local_data*)regional_alloc_zero(z->region, + sizeof(*ld)); + if(!ld) { + log_err("out of memory adding local data"); + return 0; + } + ld->node.key = ld; + ld->name = regional_alloc_init(z->region, nm, nmlen); + if(!ld->name) { + log_err("out of memory"); + return 0; + } + ld->namelen = nmlen; + ld->namelabs = nmlabs; + if(!rbtree_insert(&z->data, &ld->node)) { + log_assert(0); /* duplicate name */ + } + /* see if empty nonterminals need to be created */ + if(nmlabs > z->namelabs) { + dname_remove_label(&nm, &nmlen); + if(!lz_find_create_node(z, nm, nmlen, nmlabs-1, res)) + return 0; + } + } + *res = ld; + return 1; +} + +/** enter data RR into auth zone */ +static int +lz_enter_rr_into_zone(struct local_zone* z, const char* rrstr) +{ + uint8_t* nm; + size_t nmlen; + int nmlabs; + struct local_data* node; + struct local_rrset* rrset; + struct packed_rrset_data* pd; + uint16_t rrtype = 0, rrclass = 0; + time_t ttl = 0; + uint8_t rr[LDNS_RR_BUF_SIZE]; + uint8_t* rdata; + size_t rdata_len; + if(!get_rr_content(rrstr, &nm, &rrtype, &rrclass, &ttl, rr, sizeof(rr), + &rdata, &rdata_len)) { + log_err("bad local-data: %s", rrstr); + return 0; + } + log_assert(z->dclass == rrclass); + if(z->type == local_zone_redirect && + query_dname_compare(z->name, nm) != 0) { + log_err("local-data in redirect zone must reside at top of zone" + ", not at %s", rrstr); + free(nm); + return 0; + } + nmlabs = dname_count_size_labels(nm, &nmlen); + if(!lz_find_create_node(z, nm, nmlen, nmlabs, &node)) { + free(nm); + return 0; + } + log_assert(node); + free(nm); + + rrset = local_data_find_type(node, rrtype); + if(!rrset) { + rrset = new_local_rrset(z->region, node, rrtype, rrclass); + if(!rrset) + return 0; + if(query_dname_compare(node->name, z->name) == 0) { + if(rrtype == LDNS_RR_TYPE_NSEC) + rrset->rrset->rk.flags = PACKED_RRSET_NSEC_AT_APEX; + if(rrtype == LDNS_RR_TYPE_SOA) + z->soa = rrset->rrset; + } + } + pd = (struct packed_rrset_data*)rrset->rrset->entry.data; + log_assert(rrset && pd); + + /* check for duplicate RR */ + if(rr_is_duplicate(pd, rdata, rdata_len)) { + verbose(VERB_ALGO, "ignoring duplicate RR: %s", rrstr); + return 1; + } + return insert_rr(z->region, pd, rdata, rdata_len, ttl); +} + +/** enter a data RR into auth data; a zone for it must exist */ +static int +lz_enter_rr_str(struct local_zones* zones, const char* rr) +{ + uint8_t* rr_name; + uint16_t rr_class; + size_t len; + int labs; + struct local_zone* z; + int r; + if(!get_rr_nameclass(rr, &rr_name, &rr_class)) { + log_err("bad rr %s", rr); + return 0; + } + labs = dname_count_size_labels(rr_name, &len); + lock_rw_rdlock(&zones->lock); + z = local_zones_lookup(zones, rr_name, len, labs, rr_class); + if(!z) { + lock_rw_unlock(&zones->lock); + fatal_exit("internal error: no zone for rr %s", rr); + } + lock_rw_wrlock(&z->lock); + lock_rw_unlock(&zones->lock); + free(rr_name); + r = lz_enter_rr_into_zone(z, rr); + lock_rw_unlock(&z->lock); + return r; +} + +/** parse local-zone: statements */ +static int +lz_enter_zones(struct local_zones* zones, struct config_file* cfg) +{ + struct config_str2list* p; + struct local_zone* z; + for(p = cfg->local_zones; p; p = p->next) { + if(!(z=lz_enter_zone(zones, p->str, p->str2, + LDNS_RR_CLASS_IN))) + return 0; + lock_rw_unlock(&z->lock); + } + return 1; +} + +/** lookup a zone in rbtree; exact match only; SLOW due to parse */ +static int +lz_exists(struct local_zones* zones, const char* name) +{ + struct local_zone z; + z.node.key = &z; + z.dclass = LDNS_RR_CLASS_IN; + if(!parse_dname(name, &z.name, &z.namelen, &z.namelabs)) { + log_err("bad name %s", name); + return 0; + } + lock_rw_rdlock(&zones->lock); + if(rbtree_search(&zones->ztree, &z.node)) { + lock_rw_unlock(&zones->lock); + free(z.name); + return 1; + } + lock_rw_unlock(&zones->lock); + free(z.name); + return 0; +} + +/** lookup a zone in cfg->nodefault list */ +static int +lz_nodefault(struct config_file* cfg, const char* name) +{ + struct config_strlist* p; + size_t len = strlen(name); + if(len == 0) return 0; + if(name[len-1] == '.') len--; + + for(p = cfg->local_zones_nodefault; p; p = p->next) { + /* compare zone name, lowercase, compare without ending . */ + if(strncasecmp(p->str, name, len) == 0 && + (strlen(p->str) == len || (strlen(p->str)==len+1 && + p->str[len] == '.'))) + return 1; + } + return 0; +} + +/** enter AS112 default zone */ +static int +add_as112_default(struct local_zones* zones, struct config_file* cfg, + const char* name) +{ + struct local_zone* z; + char str[1024]; /* known long enough */ + if(lz_exists(zones, name) || lz_nodefault(cfg, name)) + return 1; /* do not enter default content */ + if(!(z=lz_enter_zone(zones, name, "static", LDNS_RR_CLASS_IN))) + return 0; + snprintf(str, sizeof(str), "%s 10800 IN SOA localhost. " + "nobody.invalid. 1 3600 1200 604800 10800", name); + if(!lz_enter_rr_into_zone(z, str)) { + lock_rw_unlock(&z->lock); + return 0; + } + snprintf(str, sizeof(str), "%s 10800 IN NS localhost. ", name); + if(!lz_enter_rr_into_zone(z, str)) { + lock_rw_unlock(&z->lock); + return 0; + } + lock_rw_unlock(&z->lock); + return 1; +} + +/** enter default zones */ +static int +lz_enter_defaults(struct local_zones* zones, struct config_file* cfg) +{ + struct local_zone* z; + + /* this list of zones is from RFC 6303 */ + + /* block localhost level zones, first, later the LAN zones */ + + /* localhost. zone */ + if(!lz_exists(zones, "localhost.") && + !lz_nodefault(cfg, "localhost.")) { + if(!(z=lz_enter_zone(zones, "localhost.", "static", + LDNS_RR_CLASS_IN)) || + !lz_enter_rr_into_zone(z, + "localhost. 10800 IN NS localhost.") || + !lz_enter_rr_into_zone(z, + "localhost. 10800 IN SOA localhost. nobody.invalid. " + "1 3600 1200 604800 10800") || + !lz_enter_rr_into_zone(z, + "localhost. 10800 IN A 127.0.0.1") || + !lz_enter_rr_into_zone(z, + "localhost. 10800 IN AAAA ::1")) { + log_err("out of memory adding default zone"); + if(z) { lock_rw_unlock(&z->lock); } + return 0; + } + lock_rw_unlock(&z->lock); + } + /* reverse ip4 zone */ + if(!lz_exists(zones, "127.in-addr.arpa.") && + !lz_nodefault(cfg, "127.in-addr.arpa.")) { + if(!(z=lz_enter_zone(zones, "127.in-addr.arpa.", "static", + LDNS_RR_CLASS_IN)) || + !lz_enter_rr_into_zone(z, + "127.in-addr.arpa. 10800 IN NS localhost.") || + !lz_enter_rr_into_zone(z, + "127.in-addr.arpa. 10800 IN SOA localhost. " + "nobody.invalid. 1 3600 1200 604800 10800") || + !lz_enter_rr_into_zone(z, + "1.0.0.127.in-addr.arpa. 10800 IN PTR localhost.")) { + log_err("out of memory adding default zone"); + if(z) { lock_rw_unlock(&z->lock); } + return 0; + } + lock_rw_unlock(&z->lock); + } + /* reverse ip6 zone */ + if(!lz_exists(zones, "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa.") && + !lz_nodefault(cfg, "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa.")) { + if(!(z=lz_enter_zone(zones, "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa.", "static", + LDNS_RR_CLASS_IN)) || + !lz_enter_rr_into_zone(z, + "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa. 10800 IN NS localhost.") || + !lz_enter_rr_into_zone(z, + "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa. 10800 IN SOA localhost. " + "nobody.invalid. 1 3600 1200 604800 10800") || + !lz_enter_rr_into_zone(z, + "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa. 10800 IN PTR localhost.")) { + log_err("out of memory adding default zone"); + if(z) { lock_rw_unlock(&z->lock); } + return 0; + } + lock_rw_unlock(&z->lock); + } + + /* if unblock lan-zones, then do not add the zones below. + * we do add the zones above, about 127.0.0.1, because localhost is + * not on the lan. */ + if(cfg->unblock_lan_zones) + return 1; + + /* block LAN level zones */ + if ( !add_as112_default(zones, cfg, "10.in-addr.arpa.") || + !add_as112_default(zones, cfg, "16.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "17.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "18.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "19.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "20.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "21.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "22.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "23.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "24.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "25.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "26.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "27.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "28.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "29.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "30.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "31.172.in-addr.arpa.") || + !add_as112_default(zones, cfg, "168.192.in-addr.arpa.") || + !add_as112_default(zones, cfg, "0.in-addr.arpa.") || + !add_as112_default(zones, cfg, "64.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "65.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "66.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "67.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "68.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "69.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "70.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "71.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "72.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "73.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "74.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "75.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "76.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "77.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "78.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "79.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "80.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "81.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "82.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "83.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "84.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "85.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "86.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "87.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "88.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "89.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "90.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "91.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "92.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "93.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "94.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "95.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "96.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "97.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "98.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "99.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "100.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "101.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "102.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "103.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "104.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "105.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "106.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "107.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "108.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "109.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "110.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "111.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "112.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "113.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "114.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "115.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "116.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "117.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "118.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "119.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "120.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "121.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "122.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "123.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "124.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "125.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "126.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "127.100.in-addr.arpa.") || + !add_as112_default(zones, cfg, "254.169.in-addr.arpa.") || + !add_as112_default(zones, cfg, "2.0.192.in-addr.arpa.") || + !add_as112_default(zones, cfg, "100.51.198.in-addr.arpa.") || + !add_as112_default(zones, cfg, "113.0.203.in-addr.arpa.") || + !add_as112_default(zones, cfg, "255.255.255.255.in-addr.arpa.") || + !add_as112_default(zones, cfg, "0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa.") || + !add_as112_default(zones, cfg, "d.f.ip6.arpa.") || + !add_as112_default(zones, cfg, "8.e.f.ip6.arpa.") || + !add_as112_default(zones, cfg, "9.e.f.ip6.arpa.") || + !add_as112_default(zones, cfg, "a.e.f.ip6.arpa.") || + !add_as112_default(zones, cfg, "b.e.f.ip6.arpa.") || + !add_as112_default(zones, cfg, "8.b.d.0.1.0.0.2.ip6.arpa.")) { + log_err("out of memory adding default zone"); + return 0; + } + return 1; +} + +/** setup parent pointers, so that a lookup can be done for closest match */ +static void +init_parents(struct local_zones* zones) +{ + struct local_zone* node, *prev = NULL, *p; + int m; + lock_rw_wrlock(&zones->lock); + RBTREE_FOR(node, struct local_zone*, &zones->ztree) { + lock_rw_wrlock(&node->lock); + node->parent = NULL; + if(!prev || prev->dclass != node->dclass) { + prev = node; + lock_rw_unlock(&node->lock); + continue; + } + (void)dname_lab_cmp(prev->name, prev->namelabs, node->name, + node->namelabs, &m); /* we know prev is smaller */ + /* sort order like: . com. bla.com. zwb.com. net. */ + /* find the previous, or parent-parent-parent */ + for(p = prev; p; p = p->parent) + /* looking for name with few labels, a parent */ + if(p->namelabs <= m) { + /* ==: since prev matched m, this is closest*/ + /* <: prev matches more, but is not a parent, + * this one is a (grand)parent */ + node->parent = p; + break; + } + prev = node; + lock_rw_unlock(&node->lock); + } + lock_rw_unlock(&zones->lock); +} + +/** enter implicit transparent zone for local-data: without local-zone: */ +static int +lz_setup_implicit(struct local_zones* zones, struct config_file* cfg) +{ + /* walk over all items that have no parent zone and find + * the name that covers them all (could be the root) and + * add that as a transparent zone */ + struct config_strlist* p; + int have_name = 0; + int have_other_classes = 0; + uint16_t dclass = 0; + uint8_t* nm = 0; + size_t nmlen = 0; + int nmlabs = 0; + int match = 0; /* number of labels match count */ + + init_parents(zones); /* to enable local_zones_lookup() */ + for(p = cfg->local_data; p; p = p->next) { + uint8_t* rr_name; + uint16_t rr_class; + size_t len; + int labs; + if(!get_rr_nameclass(p->str, &rr_name, &rr_class)) { + log_err("Bad local-data RR %s", p->str); + return 0; + } + labs = dname_count_size_labels(rr_name, &len); + lock_rw_rdlock(&zones->lock); + if(!local_zones_lookup(zones, rr_name, len, labs, rr_class)) { + if(!have_name) { + dclass = rr_class; + nm = rr_name; + nmlen = len; + nmlabs = labs; + match = labs; + have_name = 1; + } else { + int m; + if(rr_class != dclass) { + /* process other classes later */ + free(rr_name); + have_other_classes = 1; + lock_rw_unlock(&zones->lock); + continue; + } + /* find smallest shared topdomain */ + (void)dname_lab_cmp(nm, nmlabs, + rr_name, labs, &m); + free(rr_name); + if(m < match) + match = m; + } + } else free(rr_name); + lock_rw_unlock(&zones->lock); + } + if(have_name) { + uint8_t* n2; + struct local_zone* z; + /* allocate zone of smallest shared topdomain to contain em */ + n2 = nm; + dname_remove_labels(&n2, &nmlen, nmlabs - match); + n2 = memdup(n2, nmlen); + free(nm); + if(!n2) { + log_err("out of memory"); + return 0; + } + log_nametypeclass(VERB_ALGO, "implicit transparent local-zone", + n2, 0, dclass); + if(!(z=lz_enter_zone_dname(zones, n2, nmlen, match, + local_zone_transparent, dclass))) { + return 0; + } + lock_rw_unlock(&z->lock); + } + if(have_other_classes) { + /* restart to setup other class */ + return lz_setup_implicit(zones, cfg); + } + return 1; +} + +/** enter auth data */ +static int +lz_enter_data(struct local_zones* zones, struct config_file* cfg) +{ + struct config_strlist* p; + for(p = cfg->local_data; p; p = p->next) { + if(!lz_enter_rr_str(zones, p->str)) + return 0; + } + return 1; +} + +/** free memory from config */ +static void +lz_freeup_cfg(struct config_file* cfg) +{ + config_deldblstrlist(cfg->local_zones); + cfg->local_zones = NULL; + config_delstrlist(cfg->local_zones_nodefault); + cfg->local_zones_nodefault = NULL; + config_delstrlist(cfg->local_data); + cfg->local_data = NULL; +} + +int +local_zones_apply_cfg(struct local_zones* zones, struct config_file* cfg) +{ + /* create zones from zone statements. */ + if(!lz_enter_zones(zones, cfg)) { + return 0; + } + /* apply default zones+content (unless disabled, or overridden) */ + if(!lz_enter_defaults(zones, cfg)) { + return 0; + } + /* create implicit transparent zone from data. */ + if(!lz_setup_implicit(zones, cfg)) { + return 0; + } + + /* setup parent ptrs for lookup during data entry */ + init_parents(zones); + /* insert local data */ + if(!lz_enter_data(zones, cfg)) { + return 0; + } + /* freeup memory from cfg struct. */ + lz_freeup_cfg(cfg); + return 1; +} + +struct local_zone* +local_zones_lookup(struct local_zones* zones, + uint8_t* name, size_t len, int labs, uint16_t dclass) +{ + rbnode_t* res = NULL; + struct local_zone *result; + struct local_zone key; + key.node.key = &key; + key.dclass = dclass; + key.name = name; + key.namelen = len; + key.namelabs = labs; + if(rbtree_find_less_equal(&zones->ztree, &key, &res)) { + /* exact */ + return (struct local_zone*)res; + } else { + /* smaller element (or no element) */ + int m; + result = (struct local_zone*)res; + if(!result || result->dclass != dclass) + return NULL; + /* count number of labels matched */ + (void)dname_lab_cmp(result->name, result->namelabs, key.name, + key.namelabs, &m); + while(result) { /* go up until qname is subdomain of zone */ + if(result->namelabs <= m) + break; + result = result->parent; + } + return result; + } +} + +struct local_zone* +local_zones_find(struct local_zones* zones, + uint8_t* name, size_t len, int labs, uint16_t dclass) +{ + struct local_zone key; + key.node.key = &key; + key.dclass = dclass; + key.name = name; + key.namelen = len; + key.namelabs = labs; + /* exact */ + return (struct local_zone*)rbtree_search(&zones->ztree, &key); +} + +/** print all RRsets in local zone */ +static void +local_zone_out(struct local_zone* z) +{ + struct local_data* d; + struct local_rrset* p; + RBTREE_FOR(d, struct local_data*, &z->data) { + for(p = d->rrsets; p; p = p->next) { + log_nametypeclass(0, "rrset", d->name, + ntohs(p->rrset->rk.type), + ntohs(p->rrset->rk.rrset_class)); + } + } +} + +void local_zones_print(struct local_zones* zones) +{ + struct local_zone* z; + lock_rw_rdlock(&zones->lock); + log_info("number of auth zones %u", (unsigned)zones->ztree.count); + RBTREE_FOR(z, struct local_zone*, &zones->ztree) { + lock_rw_rdlock(&z->lock); + switch(z->type) { + case local_zone_deny: + log_nametypeclass(0, "deny zone", + z->name, 0, z->dclass); + break; + case local_zone_refuse: + log_nametypeclass(0, "refuse zone", + z->name, 0, z->dclass); + break; + case local_zone_redirect: + log_nametypeclass(0, "redirect zone", + z->name, 0, z->dclass); + break; + case local_zone_transparent: + log_nametypeclass(0, "transparent zone", + z->name, 0, z->dclass); + break; + case local_zone_typetransparent: + log_nametypeclass(0, "typetransparent zone", + z->name, 0, z->dclass); + break; + case local_zone_static: + log_nametypeclass(0, "static zone", + z->name, 0, z->dclass); + break; + default: + log_nametypeclass(0, "badtyped zone", + z->name, 0, z->dclass); + break; + } + local_zone_out(z); + lock_rw_unlock(&z->lock); + } + lock_rw_unlock(&zones->lock); +} + +/** encode answer consisting of 1 rrset */ +static int +local_encode(struct query_info* qinfo, struct edns_data* edns, + sldns_buffer* buf, struct regional* temp, + struct ub_packed_rrset_key* rrset, int ansec, int rcode) +{ + struct reply_info rep; + uint16_t udpsize; + /* make answer with time=0 for fixed TTL values */ + memset(&rep, 0, sizeof(rep)); + rep.flags = (uint16_t)((BIT_QR | BIT_AA | BIT_RA) | rcode); + rep.qdcount = 1; + if(ansec) + rep.an_numrrsets = 1; + else rep.ns_numrrsets = 1; + rep.rrset_count = 1; + rep.rrsets = &rrset; + udpsize = edns->udp_size; + edns->edns_version = EDNS_ADVERTISED_VERSION; + edns->udp_size = EDNS_ADVERTISED_SIZE; + edns->ext_rcode = 0; + edns->bits &= EDNS_DO; + if(!reply_info_answer_encode(qinfo, &rep, + *(uint16_t*)sldns_buffer_begin(buf), + sldns_buffer_read_u16_at(buf, 2), + buf, 0, 0, temp, udpsize, edns, + (int)(edns->bits&EDNS_DO), 0)) + error_encode(buf, (LDNS_RCODE_SERVFAIL|BIT_AA), qinfo, + *(uint16_t*)sldns_buffer_begin(buf), + sldns_buffer_read_u16_at(buf, 2), edns); + return 1; +} + +/** answer local data match */ +static int +local_data_answer(struct local_zone* z, struct query_info* qinfo, + struct edns_data* edns, sldns_buffer* buf, struct regional* temp, + int labs, struct local_data** ldp) +{ + struct local_data key; + struct local_data* ld; + struct local_rrset* lr; + key.node.key = &key; + key.name = qinfo->qname; + key.namelen = qinfo->qname_len; + key.namelabs = labs; + if(z->type == local_zone_redirect) { + key.name = z->name; + key.namelen = z->namelen; + key.namelabs = z->namelabs; + } + ld = (struct local_data*)rbtree_search(&z->data, &key.node); + *ldp = ld; + if(!ld) { + return 0; + } + lr = local_data_find_type(ld, qinfo->qtype); + if(!lr) + return 0; + if(z->type == local_zone_redirect) { + /* convert rrset name to query name; like a wildcard */ + struct ub_packed_rrset_key r = *lr->rrset; + r.rk.dname = qinfo->qname; + r.rk.dname_len = qinfo->qname_len; + return local_encode(qinfo, edns, buf, temp, &r, 1, + LDNS_RCODE_NOERROR); + } + return local_encode(qinfo, edns, buf, temp, lr->rrset, 1, + LDNS_RCODE_NOERROR); +} + +/** + * answer in case where no exact match is found + * @param z: zone for query + * @param qinfo: query + * @param edns: edns from query + * @param buf: buffer for answer. + * @param temp: temp region for encoding + * @param ld: local data, if NULL, no such name exists in localdata. + * @return 1 if a reply is to be sent, 0 if not. + */ +static int +lz_zone_answer(struct local_zone* z, struct query_info* qinfo, + struct edns_data* edns, sldns_buffer* buf, struct regional* temp, + struct local_data* ld) +{ + if(z->type == local_zone_deny) { + /** no reply at all, signal caller by clearing buffer. */ + sldns_buffer_clear(buf); + sldns_buffer_flip(buf); + return 1; + } else if(z->type == local_zone_refuse) { + error_encode(buf, (LDNS_RCODE_REFUSED|BIT_AA), qinfo, + *(uint16_t*)sldns_buffer_begin(buf), + sldns_buffer_read_u16_at(buf, 2), edns); + return 1; + } else if(z->type == local_zone_static || + z->type == local_zone_redirect) { + /* for static, reply nodata or nxdomain + * for redirect, reply nodata */ + /* no additional section processing, + * cname, dname or wildcard processing, + * or using closest match for NSEC. + * or using closest match for returning delegation downwards + */ + int rcode = ld?LDNS_RCODE_NOERROR:LDNS_RCODE_NXDOMAIN; + if(z->soa) + return local_encode(qinfo, edns, buf, temp, + z->soa, 0, rcode); + error_encode(buf, (rcode|BIT_AA), qinfo, + *(uint16_t*)sldns_buffer_begin(buf), + sldns_buffer_read_u16_at(buf, 2), edns); + return 1; + } else if(z->type == local_zone_typetransparent) { + /* no NODATA or NXDOMAINS for this zone type */ + return 0; + } + /* else z->type == local_zone_transparent */ + + /* if the zone is transparent and the name exists, but the type + * does not, then we should make this noerror/nodata */ + if(ld && ld->rrsets) { + int rcode = LDNS_RCODE_NOERROR; + if(z->soa) + return local_encode(qinfo, edns, buf, temp, + z->soa, 0, rcode); + error_encode(buf, (rcode|BIT_AA), qinfo, + *(uint16_t*)sldns_buffer_begin(buf), + sldns_buffer_read_u16_at(buf, 2), edns); + return 1; + } + + /* stop here, and resolve further on */ + return 0; +} + +int +local_zones_answer(struct local_zones* zones, struct query_info* qinfo, + struct edns_data* edns, sldns_buffer* buf, struct regional* temp) +{ + /* see if query is covered by a zone, + * if so: - try to match (exact) local data + * - look at zone type for negative response. */ + int labs = dname_count_labels(qinfo->qname); + struct local_data* ld; + struct local_zone* z; + int r; + lock_rw_rdlock(&zones->lock); + z = local_zones_lookup(zones, qinfo->qname, + qinfo->qname_len, labs, qinfo->qclass); + if(!z) { + lock_rw_unlock(&zones->lock); + return 0; + } + lock_rw_rdlock(&z->lock); + lock_rw_unlock(&zones->lock); + + if(local_data_answer(z, qinfo, edns, buf, temp, labs, &ld)) { + lock_rw_unlock(&z->lock); + return 1; + } + r = lz_zone_answer(z, qinfo, edns, buf, temp, ld); + lock_rw_unlock(&z->lock); + return r; +} + +const char* local_zone_type2str(enum localzone_type t) +{ + switch(t) { + case local_zone_deny: return "deny"; + case local_zone_refuse: return "refuse"; + case local_zone_redirect: return "redirect"; + case local_zone_transparent: return "transparent"; + case local_zone_typetransparent: return "typetransparent"; + case local_zone_static: return "static"; + case local_zone_nodefault: return "nodefault"; + } + return "badtyped"; +} + +int local_zone_str2type(const char* type, enum localzone_type* t) +{ + if(strcmp(type, "deny") == 0) + *t = local_zone_deny; + else if(strcmp(type, "refuse") == 0) + *t = local_zone_refuse; + else if(strcmp(type, "static") == 0) + *t = local_zone_static; + else if(strcmp(type, "transparent") == 0) + *t = local_zone_transparent; + else if(strcmp(type, "typetransparent") == 0) + *t = local_zone_typetransparent; + else if(strcmp(type, "redirect") == 0) + *t = local_zone_redirect; + else return 0; + return 1; +} + +/** iterate over the kiddies of the given name and set their parent ptr */ +static void +set_kiddo_parents(struct local_zone* z, struct local_zone* match, + struct local_zone* newp) +{ + /* both zones and z are locked already */ + /* in the sorted rbtree, the kiddies of z are located after z */ + /* z must be present in the tree */ + struct local_zone* p = z; + p = (struct local_zone*)rbtree_next(&p->node); + while(p!=(struct local_zone*)RBTREE_NULL && + p->dclass == z->dclass && dname_strict_subdomain(p->name, + p->namelabs, z->name, z->namelabs)) { + /* update parent ptr */ + /* only when matches with existing parent pointer, so that + * deeper child structures are not touched, i.e. + * update of x, and a.x, b.x, f.b.x, g.b.x, c.x, y + * gets to update a.x, b.x and c.x */ + lock_rw_wrlock(&p->lock); + if(p->parent == match) + p->parent = newp; + lock_rw_unlock(&p->lock); + p = (struct local_zone*)rbtree_next(&p->node); + } +} + +struct local_zone* local_zones_add_zone(struct local_zones* zones, + uint8_t* name, size_t len, int labs, uint16_t dclass, + enum localzone_type tp) +{ + /* create */ + struct local_zone* z = local_zone_create(name, len, labs, tp, dclass); + if(!z) return NULL; + lock_rw_wrlock(&z->lock); + + /* find the closest parent */ + z->parent = local_zones_find(zones, name, len, labs, dclass); + + /* insert into the tree */ + if(!rbtree_insert(&zones->ztree, &z->node)) { + /* duplicate entry! */ + lock_rw_unlock(&z->lock); + local_zone_delete(z); + log_err("internal: duplicate entry in local_zones_add_zone"); + return NULL; + } + + /* set parent pointers right */ + set_kiddo_parents(z, z->parent, z); + + lock_rw_unlock(&z->lock); + return z; +} + +void local_zones_del_zone(struct local_zones* zones, struct local_zone* z) +{ + /* fix up parents in tree */ + lock_rw_wrlock(&z->lock); + set_kiddo_parents(z, z, z->parent); + + /* remove from tree */ + (void)rbtree_delete(&zones->ztree, z); + + /* delete the zone */ + lock_rw_unlock(&z->lock); + local_zone_delete(z); +} + +int +local_zones_add_RR(struct local_zones* zones, const char* rr) +{ + uint8_t* rr_name; + uint16_t rr_class; + size_t len; + int labs; + struct local_zone* z; + int r; + if(!get_rr_nameclass(rr, &rr_name, &rr_class)) { + return 0; + } + labs = dname_count_size_labels(rr_name, &len); + /* could first try readlock then get writelock if zone does not exist, + * but we do not add enough RRs (from multiple threads) to optimize */ + lock_rw_wrlock(&zones->lock); + z = local_zones_lookup(zones, rr_name, len, labs, rr_class); + if(!z) { + z = local_zones_add_zone(zones, rr_name, len, labs, rr_class, + local_zone_transparent); + if(!z) { + lock_rw_unlock(&zones->lock); + return 0; + } + } else { + free(rr_name); + } + lock_rw_wrlock(&z->lock); + lock_rw_unlock(&zones->lock); + r = lz_enter_rr_into_zone(z, rr); + lock_rw_unlock(&z->lock); + return r; +} + +/** returns true if the node is terminal so no deeper domain names exist */ +static int +is_terminal(struct local_data* d) +{ + /* for empty nonterminals, the deeper domain names are sorted + * right after them, so simply check the next name in the tree + */ + struct local_data* n = (struct local_data*)rbtree_next(&d->node); + if(n == (struct local_data*)RBTREE_NULL) + return 1; /* last in tree, no deeper node */ + if(dname_strict_subdomain(n->name, n->namelabs, d->name, d->namelabs)) + return 0; /* there is a deeper node */ + return 1; +} + +/** delete empty terminals from tree when final data is deleted */ +static void +del_empty_term(struct local_zone* z, struct local_data* d, + uint8_t* name, size_t len, int labs) +{ + while(d && d->rrsets == NULL && is_terminal(d)) { + /* is this empty nonterminal? delete */ + /* note, no memory recycling in zone region */ + (void)rbtree_delete(&z->data, d); + + /* go up and to the next label */ + if(dname_is_root(name)) + return; + dname_remove_label(&name, &len); + labs--; + d = lz_find_node(z, name, len, labs); + } +} + +void local_zones_del_data(struct local_zones* zones, + uint8_t* name, size_t len, int labs, uint16_t dclass) +{ + /* find zone */ + struct local_zone* z; + struct local_data* d; + lock_rw_rdlock(&zones->lock); + z = local_zones_lookup(zones, name, len, labs, dclass); + if(!z) { + /* no such zone, we're done */ + lock_rw_unlock(&zones->lock); + return; + } + lock_rw_wrlock(&z->lock); + lock_rw_unlock(&zones->lock); + + /* find the domain */ + d = lz_find_node(z, name, len, labs); + if(d) { + /* no memory recycling for zone deletions ... */ + d->rrsets = NULL; + /* did we delete the soa record ? */ + if(query_dname_compare(d->name, z->name) == 0) + z->soa = NULL; + + /* cleanup the empty nonterminals for this name */ + del_empty_term(z, d, name, len, labs); + } + + lock_rw_unlock(&z->lock); +} diff --git a/external/unbound/services/localzone.h b/external/unbound/services/localzone.h new file mode 100644 index 000000000..788fbfb3b --- /dev/null +++ b/external/unbound/services/localzone.h @@ -0,0 +1,317 @@ +/* + * services/localzone.h - local zones authority service. + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file contains functions to enable local zone authority service. + */ + +#ifndef SERVICES_LOCALZONE_H +#define SERVICES_LOCALZONE_H +#include "util/rbtree.h" +#include "util/locks.h" +struct ub_packed_rrset_key; +struct regional; +struct config_file; +struct edns_data; +struct query_info; +struct sldns_buffer; + +/** + * Local zone type + * This type determines processing for queries that did not match + * local-data directly. + */ +enum localzone_type { + /** drop query */ + local_zone_deny = 0, + /** answer with error */ + local_zone_refuse, + /** answer nxdomain or nodata */ + local_zone_static, + /** resolve normally */ + local_zone_transparent, + /** do not block types at localdata names */ + local_zone_typetransparent, + /** answer with data at zone apex */ + local_zone_redirect, + /** remove default AS112 blocking contents for zone + * nodefault is used in config not during service. */ + local_zone_nodefault +}; + +/** + * Authoritative local zones storage, shared. + */ +struct local_zones { + /** lock on the localzone tree */ + lock_rw_t lock; + /** rbtree of struct local_zone */ + rbtree_t ztree; +}; + +/** + * Local zone. A locally served authoritative zone. + */ +struct local_zone { + /** rbtree node, key is name and class */ + rbnode_t node; + /** parent zone, if any. */ + struct local_zone* parent; + + /** zone name, in uncompressed wireformat */ + uint8_t* name; + /** length of zone name */ + size_t namelen; + /** number of labels in zone name */ + int namelabs; + /** the class of this zone. + * uses 'dclass' to not conflict with c++ keyword class. */ + uint16_t dclass; + + /** lock on the data in the structure + * For the node, parent, name, namelen, namelabs, dclass, you + * need to also hold the zones_tree lock to change them (or to + * delete this zone) */ + lock_rw_t lock; + + /** how to process zone */ + enum localzone_type type; + + /** in this region the zone's data is allocated. + * the struct local_zone itself is malloced. */ + struct regional* region; + /** local data for this zone + * rbtree of struct local_data */ + rbtree_t data; + /** if data contains zone apex SOA data, this is a ptr to it. */ + struct ub_packed_rrset_key* soa; +}; + +/** + * Local data. One domain name, and the RRs to go with it. + */ +struct local_data { + /** rbtree node, key is name only */ + rbnode_t node; + /** domain name */ + uint8_t* name; + /** length of name */ + size_t namelen; + /** number of labels in name */ + int namelabs; + /** the data rrsets, with different types, linked list. + * If this list is NULL, the node is an empty non-terminal. */ + struct local_rrset* rrsets; +}; + +/** + * A local data RRset + */ +struct local_rrset { + /** next in list */ + struct local_rrset* next; + /** RRset data item */ + struct ub_packed_rrset_key* rrset; +}; + +/** + * Create local zones storage + * @return new struct or NULL on error. + */ +struct local_zones* local_zones_create(void); + +/** + * Delete local zones storage + * @param zones: to delete. + */ +void local_zones_delete(struct local_zones* zones); + +/** + * Apply config settings; setup the local authoritative data. + * Takes care of locking. + * @param zones: is set up. + * @param cfg: config data. + * @return false on error. + */ +int local_zones_apply_cfg(struct local_zones* zones, struct config_file* cfg); + +/** + * Compare two local_zone entries in rbtree. Sort hierarchical but not + * canonical + * @param z1: zone 1 + * @param z2: zone 2 + * @return: -1, 0, +1 comparison value. + */ +int local_zone_cmp(const void* z1, const void* z2); + +/** + * Compare two local_data entries in rbtree. Sort canonical. + * @param d1: data 1 + * @param d2: data 2 + * @return: -1, 0, +1 comparison value. + */ +int local_data_cmp(const void* d1, const void* d2); + +/** + * Delete one zone + * @param z: to delete. + */ +void local_zone_delete(struct local_zone* z); + +/** + * Lookup zone that contains the given name, class. + * User must lock the tree or result zone. + * @param zones: the zones tree + * @param name: dname to lookup + * @param len: length of name. + * @param labs: labelcount of name. + * @param dclass: class to lookup. + * @return closest local_zone or NULL if no covering zone is found. + */ +struct local_zone* local_zones_lookup(struct local_zones* zones, + uint8_t* name, size_t len, int labs, uint16_t dclass); + +/** + * Debug helper. Print all zones + * Takes care of locking. + * @param zones: the zones tree + */ +void local_zones_print(struct local_zones* zones); + +/** + * Answer authoritatively for local zones. + * Takes care of locking. + * @param zones: the stored zones (shared, read only). + * @param qinfo: query info (parsed). + * @param edns: edns info (parsed). + * @param buf: buffer with query ID and flags, also for reply. + * @param temp: temporary storage region. + * @return true if answer is in buffer. false if query is not answered + * by authority data. If the reply should be dropped altogether, the return + * value is true, but the buffer is cleared (empty). + */ +int local_zones_answer(struct local_zones* zones, struct query_info* qinfo, + struct edns_data* edns, struct sldns_buffer* buf, struct regional* temp); + +/** + * Parse the string into localzone type. + * + * @param str: string to parse + * @param t: local zone type returned here. + * @return 0 on parse error. + */ +int local_zone_str2type(const char* str, enum localzone_type* t); + +/** + * Print localzone type to a string. Pointer to a constant string. + * + * @param t: local zone type. + * @return constant string that describes type. + */ +const char* local_zone_type2str(enum localzone_type t); + +/** + * Find zone that with exactly given name, class. + * User must lock the tree or result zone. + * @param zones: the zones tree + * @param name: dname to lookup + * @param len: length of name. + * @param labs: labelcount of name. + * @param dclass: class to lookup. + * @return the exact local_zone or NULL. + */ +struct local_zone* local_zones_find(struct local_zones* zones, + uint8_t* name, size_t len, int labs, uint16_t dclass); + +/** + * Add a new zone. Caller must hold the zones lock. + * Adjusts the other zones as well (parent pointers) after insertion. + * The zone must NOT exist (returns NULL and logs error). + * @param zones: the zones tree + * @param name: dname to add + * @param len: length of name. + * @param labs: labelcount of name. + * @param dclass: class to add. + * @param tp: type. + * @return local_zone or NULL on error, caller must printout memory error. + */ +struct local_zone* local_zones_add_zone(struct local_zones* zones, + uint8_t* name, size_t len, int labs, uint16_t dclass, + enum localzone_type tp); + +/** + * Delete a zone. Caller must hold the zones lock. + * Adjusts the other zones as well (parent pointers) after insertion. + * @param zones: the zones tree + * @param zone: the zone to delete from tree. Also deletes zone from memory. + */ +void local_zones_del_zone(struct local_zones* zones, struct local_zone* zone); + +/** + * Add RR data into the localzone data. + * Looks up the zone, if no covering zone, a transparent zone with the + * name of the RR is created. + * @param zones: the zones tree. Not locked by caller. + * @param rr: string with on RR. + * @return false on failure. + */ +int local_zones_add_RR(struct local_zones* zones, const char* rr); + +/** + * Remove data from domain name in the tree. + * All types are removed. No effect if zone or name does not exist. + * @param zones: zones tree. + * @param name: dname to remove + * @param len: length of name. + * @param labs: labelcount of name. + * @param dclass: class to remove. + */ +void local_zones_del_data(struct local_zones* zones, + uint8_t* name, size_t len, int labs, uint16_t dclass); + + +/** + * Form wireformat from text format domain name. + * @param str: the domain name in text "www.example.com" + * @param res: resulting wireformat is stored here with malloc. + * @param len: length of resulting wireformat. + * @param labs: number of labels in resulting wireformat. + * @return false on error, syntax or memory. Also logged. + */ +int parse_dname(const char* str, uint8_t** res, size_t* len, int* labs); + +#endif /* SERVICES_LOCALZONE_H */ diff --git a/external/unbound/services/mesh.c b/external/unbound/services/mesh.c new file mode 100644 index 000000000..bc711d9b3 --- /dev/null +++ b/external/unbound/services/mesh.c @@ -0,0 +1,1209 @@ +/* + * services/mesh.c - deal with mesh of query states and handle events for that. + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file contains functions to assist in dealing with a mesh of + * query states. This mesh is supposed to be thread-specific. + * It consists of query states (per qname, qtype, qclass) and connections + * between query states and the super and subquery states, and replies to + * send back to clients. + */ +#include "config.h" +#include "services/mesh.h" +#include "services/outbound_list.h" +#include "services/cache/dns.h" +#include "util/log.h" +#include "util/net_help.h" +#include "util/module.h" +#include "util/regional.h" +#include "util/data/msgencode.h" +#include "util/timehist.h" +#include "util/fptr_wlist.h" +#include "util/alloc.h" +#include "util/config_file.h" +#include "ldns/sbuffer.h" + +/** subtract timers and the values do not overflow or become negative */ +static void +timeval_subtract(struct timeval* d, const struct timeval* end, const struct timeval* start) +{ +#ifndef S_SPLINT_S + time_t end_usec = end->tv_usec; + d->tv_sec = end->tv_sec - start->tv_sec; + if(end_usec < start->tv_usec) { + end_usec += 1000000; + d->tv_sec--; + } + d->tv_usec = end_usec - start->tv_usec; +#endif +} + +/** add timers and the values do not overflow or become negative */ +static void +timeval_add(struct timeval* d, const struct timeval* add) +{ +#ifndef S_SPLINT_S + d->tv_sec += add->tv_sec; + d->tv_usec += add->tv_usec; + if(d->tv_usec > 1000000 ) { + d->tv_usec -= 1000000; + d->tv_sec++; + } +#endif +} + +/** divide sum of timers to get average */ +static void +timeval_divide(struct timeval* avg, const struct timeval* sum, size_t d) +{ +#ifndef S_SPLINT_S + size_t leftover; + if(d == 0) { + avg->tv_sec = 0; + avg->tv_usec = 0; + return; + } + avg->tv_sec = sum->tv_sec / d; + avg->tv_usec = sum->tv_usec / d; + /* handle fraction from seconds divide */ + leftover = sum->tv_sec - avg->tv_sec*d; + avg->tv_usec += (leftover*1000000)/d; +#endif +} + +/** histogram compare of time values */ +static int +timeval_smaller(const struct timeval* x, const struct timeval* y) +{ +#ifndef S_SPLINT_S + if(x->tv_sec < y->tv_sec) + return 1; + else if(x->tv_sec == y->tv_sec) { + if(x->tv_usec <= y->tv_usec) + return 1; + else return 0; + } + else return 0; +#endif +} + +int +mesh_state_compare(const void* ap, const void* bp) +{ + struct mesh_state* a = (struct mesh_state*)ap; + struct mesh_state* b = (struct mesh_state*)bp; + + if(a->s.is_priming && !b->s.is_priming) + return -1; + if(!a->s.is_priming && b->s.is_priming) + return 1; + + if((a->s.query_flags&BIT_RD) && !(b->s.query_flags&BIT_RD)) + return -1; + if(!(a->s.query_flags&BIT_RD) && (b->s.query_flags&BIT_RD)) + return 1; + + if((a->s.query_flags&BIT_CD) && !(b->s.query_flags&BIT_CD)) + return -1; + if(!(a->s.query_flags&BIT_CD) && (b->s.query_flags&BIT_CD)) + return 1; + + return query_info_compare(&a->s.qinfo, &b->s.qinfo); +} + +int +mesh_state_ref_compare(const void* ap, const void* bp) +{ + struct mesh_state_ref* a = (struct mesh_state_ref*)ap; + struct mesh_state_ref* b = (struct mesh_state_ref*)bp; + return mesh_state_compare(a->s, b->s); +} + +struct mesh_area* +mesh_create(struct module_stack* stack, struct module_env* env) +{ + struct mesh_area* mesh = calloc(1, sizeof(struct mesh_area)); + if(!mesh) { + log_err("mesh area alloc: out of memory"); + return NULL; + } + mesh->histogram = timehist_setup(); + mesh->qbuf_bak = sldns_buffer_new(env->cfg->msg_buffer_size); + if(!mesh->histogram || !mesh->qbuf_bak) { + free(mesh); + log_err("mesh area alloc: out of memory"); + return NULL; + } + mesh->mods = *stack; + mesh->env = env; + rbtree_init(&mesh->run, &mesh_state_compare); + rbtree_init(&mesh->all, &mesh_state_compare); + mesh->num_reply_addrs = 0; + mesh->num_reply_states = 0; + mesh->num_detached_states = 0; + mesh->num_forever_states = 0; + mesh->stats_jostled = 0; + mesh->stats_dropped = 0; + mesh->max_reply_states = env->cfg->num_queries_per_thread; + mesh->max_forever_states = (mesh->max_reply_states+1)/2; +#ifndef S_SPLINT_S + mesh->jostle_max.tv_sec = (time_t)(env->cfg->jostle_time / 1000); + mesh->jostle_max.tv_usec = (time_t)((env->cfg->jostle_time % 1000) + *1000); +#endif + return mesh; +} + +/** help mesh delete delete mesh states */ +static void +mesh_delete_helper(rbnode_t* n) +{ + struct mesh_state* mstate = (struct mesh_state*)n->key; + /* perform a full delete, not only 'cleanup' routine, + * because other callbacks expect a clean state in the mesh. + * For 're-entrant' calls */ + mesh_state_delete(&mstate->s); + /* but because these delete the items from the tree, postorder + * traversal and rbtree rebalancing do not work together */ +} + +void +mesh_delete(struct mesh_area* mesh) +{ + if(!mesh) + return; + /* free all query states */ + while(mesh->all.count) + mesh_delete_helper(mesh->all.root); + timehist_delete(mesh->histogram); + sldns_buffer_free(mesh->qbuf_bak); + free(mesh); +} + +void +mesh_delete_all(struct mesh_area* mesh) +{ + /* free all query states */ + while(mesh->all.count) + mesh_delete_helper(mesh->all.root); + mesh->stats_dropped += mesh->num_reply_addrs; + /* clear mesh area references */ + rbtree_init(&mesh->run, &mesh_state_compare); + rbtree_init(&mesh->all, &mesh_state_compare); + mesh->num_reply_addrs = 0; + mesh->num_reply_states = 0; + mesh->num_detached_states = 0; + mesh->num_forever_states = 0; + mesh->forever_first = NULL; + mesh->forever_last = NULL; + mesh->jostle_first = NULL; + mesh->jostle_last = NULL; +} + +int mesh_make_new_space(struct mesh_area* mesh, sldns_buffer* qbuf) +{ + struct mesh_state* m = mesh->jostle_first; + /* free space is available */ + if(mesh->num_reply_states < mesh->max_reply_states) + return 1; + /* try to kick out a jostle-list item */ + if(m && m->reply_list && m->list_select == mesh_jostle_list) { + /* how old is it? */ + struct timeval age; + timeval_subtract(&age, mesh->env->now_tv, + &m->reply_list->start_time); + if(timeval_smaller(&mesh->jostle_max, &age)) { + /* its a goner */ + log_nametypeclass(VERB_ALGO, "query jostled out to " + "make space for a new one", + m->s.qinfo.qname, m->s.qinfo.qtype, + m->s.qinfo.qclass); + /* backup the query */ + if(qbuf) sldns_buffer_copy(mesh->qbuf_bak, qbuf); + /* notify supers */ + if(m->super_set.count > 0) { + verbose(VERB_ALGO, "notify supers of failure"); + m->s.return_msg = NULL; + m->s.return_rcode = LDNS_RCODE_SERVFAIL; + mesh_walk_supers(mesh, m); + } + mesh->stats_jostled ++; + mesh_state_delete(&m->s); + /* restore the query - note that the qinfo ptr to + * the querybuffer is then correct again. */ + if(qbuf) sldns_buffer_copy(qbuf, mesh->qbuf_bak); + return 1; + } + } + /* no space for new item */ + return 0; +} + +void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo, + uint16_t qflags, struct edns_data* edns, struct comm_reply* rep, + uint16_t qid) +{ + /* do not use CD flag from user for mesh state, we want the CD-query + * to receive validation anyway, to protect out cache contents and + * avoid bad-data in this cache that a downstream validator cannot + * remove from this cache */ + struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&BIT_RD, 0); + int was_detached = 0; + int was_noreply = 0; + int added = 0; + /* does this create a new reply state? */ + if(!s || s->list_select == mesh_no_list) { + if(!mesh_make_new_space(mesh, rep->c->buffer)) { + verbose(VERB_ALGO, "Too many queries. dropping " + "incoming query."); + comm_point_drop_reply(rep); + mesh->stats_dropped ++; + return; + } + /* for this new reply state, the reply address is free, + * so the limit of reply addresses does not stop reply states*/ + } else { + /* protect our memory usage from storing reply addresses */ + if(mesh->num_reply_addrs > mesh->max_reply_states*16) { + verbose(VERB_ALGO, "Too many requests queued. " + "dropping incoming query."); + mesh->stats_dropped++; + comm_point_drop_reply(rep); + return; + } + } + /* see if it already exists, if not, create one */ + if(!s) { +#ifdef UNBOUND_DEBUG + struct rbnode_t* n; +#endif + s = mesh_state_create(mesh->env, qinfo, qflags&BIT_RD, 0); + if(!s) { + log_err("mesh_state_create: out of memory; SERVFAIL"); + error_encode(rep->c->buffer, LDNS_RCODE_SERVFAIL, + qinfo, qid, qflags, edns); + comm_point_send_reply(rep); + return; + } +#ifdef UNBOUND_DEBUG + n = +#else + (void) +#endif + rbtree_insert(&mesh->all, &s->node); + log_assert(n != NULL); + /* set detached (it is now) */ + mesh->num_detached_states++; + added = 1; + } + if(!s->reply_list && !s->cb_list && s->super_set.count == 0) + was_detached = 1; + if(!s->reply_list && !s->cb_list) + was_noreply = 1; + /* add reply to s */ + if(!mesh_state_add_reply(s, edns, rep, qid, qflags, qinfo->qname)) { + log_err("mesh_new_client: out of memory; SERVFAIL"); + error_encode(rep->c->buffer, LDNS_RCODE_SERVFAIL, + qinfo, qid, qflags, edns); + comm_point_send_reply(rep); + if(added) + mesh_state_delete(&s->s); + return; + } + /* update statistics */ + if(was_detached) { + log_assert(mesh->num_detached_states > 0); + mesh->num_detached_states--; + } + if(was_noreply) { + mesh->num_reply_states ++; + } + mesh->num_reply_addrs++; + if(s->list_select == mesh_no_list) { + /* move to either the forever or the jostle_list */ + if(mesh->num_forever_states < mesh->max_forever_states) { + mesh->num_forever_states ++; + mesh_list_insert(s, &mesh->forever_first, + &mesh->forever_last); + s->list_select = mesh_forever_list; + } else { + mesh_list_insert(s, &mesh->jostle_first, + &mesh->jostle_last); + s->list_select = mesh_jostle_list; + } + } + if(added) + mesh_run(mesh, s, module_event_new, NULL); +} + +int +mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo, + uint16_t qflags, struct edns_data* edns, sldns_buffer* buf, + uint16_t qid, mesh_cb_func_t cb, void* cb_arg) +{ + struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&BIT_RD, 0); + int was_detached = 0; + int was_noreply = 0; + int added = 0; + /* there are no limits on the number of callbacks */ + + /* see if it already exists, if not, create one */ + if(!s) { +#ifdef UNBOUND_DEBUG + struct rbnode_t* n; +#endif + s = mesh_state_create(mesh->env, qinfo, qflags&BIT_RD, 0); + if(!s) { + return 0; + } +#ifdef UNBOUND_DEBUG + n = +#else + (void) +#endif + rbtree_insert(&mesh->all, &s->node); + log_assert(n != NULL); + /* set detached (it is now) */ + mesh->num_detached_states++; + added = 1; + } + if(!s->reply_list && !s->cb_list && s->super_set.count == 0) + was_detached = 1; + if(!s->reply_list && !s->cb_list) + was_noreply = 1; + /* add reply to s */ + if(!mesh_state_add_cb(s, edns, buf, cb, cb_arg, qid, qflags)) { + if(added) + mesh_state_delete(&s->s); + return 0; + } + /* update statistics */ + if(was_detached) { + log_assert(mesh->num_detached_states > 0); + mesh->num_detached_states--; + } + if(was_noreply) { + mesh->num_reply_states ++; + } + mesh->num_reply_addrs++; + if(added) + mesh_run(mesh, s, module_event_new, NULL); + return 1; +} + +void mesh_new_prefetch(struct mesh_area* mesh, struct query_info* qinfo, + uint16_t qflags, time_t leeway) +{ + struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&BIT_RD, 0); +#ifdef UNBOUND_DEBUG + struct rbnode_t* n; +#endif + /* already exists, and for a different purpose perhaps. + * if mesh_no_list, keep it that way. */ + if(s) { + /* make it ignore the cache from now on */ + if(!s->s.blacklist) + sock_list_insert(&s->s.blacklist, NULL, 0, s->s.region); + if(s->s.prefetch_leeway < leeway) + s->s.prefetch_leeway = leeway; + return; + } + if(!mesh_make_new_space(mesh, NULL)) { + verbose(VERB_ALGO, "Too many queries. dropped prefetch."); + mesh->stats_dropped ++; + return; + } + s = mesh_state_create(mesh->env, qinfo, qflags&BIT_RD, 0); + if(!s) { + log_err("prefetch mesh_state_create: out of memory"); + return; + } +#ifdef UNBOUND_DEBUG + n = +#else + (void) +#endif + rbtree_insert(&mesh->all, &s->node); + log_assert(n != NULL); + /* set detached (it is now) */ + mesh->num_detached_states++; + /* make it ignore the cache */ + sock_list_insert(&s->s.blacklist, NULL, 0, s->s.region); + s->s.prefetch_leeway = leeway; + + if(s->list_select == mesh_no_list) { + /* move to either the forever or the jostle_list */ + if(mesh->num_forever_states < mesh->max_forever_states) { + mesh->num_forever_states ++; + mesh_list_insert(s, &mesh->forever_first, + &mesh->forever_last); + s->list_select = mesh_forever_list; + } else { + mesh_list_insert(s, &mesh->jostle_first, + &mesh->jostle_last); + s->list_select = mesh_jostle_list; + } + } + mesh_run(mesh, s, module_event_new, NULL); +} + +void mesh_report_reply(struct mesh_area* mesh, struct outbound_entry* e, + struct comm_reply* reply, int what) +{ + enum module_ev event = module_event_reply; + e->qstate->reply = reply; + if(what != NETEVENT_NOERROR) { + event = module_event_noreply; + if(what == NETEVENT_CAPSFAIL) + event = module_event_capsfail; + } + mesh_run(mesh, e->qstate->mesh_info, event, e); +} + +struct mesh_state* +mesh_state_create(struct module_env* env, struct query_info* qinfo, + uint16_t qflags, int prime) +{ + struct regional* region = alloc_reg_obtain(env->alloc); + struct mesh_state* mstate; + int i; + if(!region) + return NULL; + mstate = (struct mesh_state*)regional_alloc(region, + sizeof(struct mesh_state)); + if(!mstate) { + alloc_reg_release(env->alloc, region); + return NULL; + } + memset(mstate, 0, sizeof(*mstate)); + mstate->node = *RBTREE_NULL; + mstate->run_node = *RBTREE_NULL; + mstate->node.key = mstate; + mstate->run_node.key = mstate; + mstate->reply_list = NULL; + mstate->list_select = mesh_no_list; + mstate->replies_sent = 0; + rbtree_init(&mstate->super_set, &mesh_state_ref_compare); + rbtree_init(&mstate->sub_set, &mesh_state_ref_compare); + mstate->num_activated = 0; + /* init module qstate */ + mstate->s.qinfo.qtype = qinfo->qtype; + mstate->s.qinfo.qclass = qinfo->qclass; + mstate->s.qinfo.qname_len = qinfo->qname_len; + mstate->s.qinfo.qname = regional_alloc_init(region, qinfo->qname, + qinfo->qname_len); + if(!mstate->s.qinfo.qname) { + alloc_reg_release(env->alloc, region); + return NULL; + } + /* remove all weird bits from qflags */ + mstate->s.query_flags = (qflags & (BIT_RD|BIT_CD)); + mstate->s.is_priming = prime; + mstate->s.reply = NULL; + mstate->s.region = region; + mstate->s.curmod = 0; + mstate->s.return_msg = 0; + mstate->s.return_rcode = LDNS_RCODE_NOERROR; + mstate->s.env = env; + mstate->s.mesh_info = mstate; + mstate->s.prefetch_leeway = 0; + /* init modules */ + for(i=0; i<env->mesh->mods.num; i++) { + mstate->s.minfo[i] = NULL; + mstate->s.ext_state[i] = module_state_initial; + } + return mstate; +} + +void +mesh_state_cleanup(struct mesh_state* mstate) +{ + struct mesh_area* mesh; + int i; + if(!mstate) + return; + mesh = mstate->s.env->mesh; + /* drop unsent replies */ + if(!mstate->replies_sent) { + struct mesh_reply* rep; + struct mesh_cb* cb; + for(rep=mstate->reply_list; rep; rep=rep->next) { + comm_point_drop_reply(&rep->query_reply); + mesh->num_reply_addrs--; + } + for(cb=mstate->cb_list; cb; cb=cb->next) { + fptr_ok(fptr_whitelist_mesh_cb(cb->cb)); + (*cb->cb)(cb->cb_arg, LDNS_RCODE_SERVFAIL, NULL, + sec_status_unchecked, NULL); + mesh->num_reply_addrs--; + } + } + + /* de-init modules */ + for(i=0; i<mesh->mods.num; i++) { + fptr_ok(fptr_whitelist_mod_clear(mesh->mods.mod[i]->clear)); + (*mesh->mods.mod[i]->clear)(&mstate->s, i); + mstate->s.minfo[i] = NULL; + mstate->s.ext_state[i] = module_finished; + } + alloc_reg_release(mstate->s.env->alloc, mstate->s.region); +} + +void +mesh_state_delete(struct module_qstate* qstate) +{ + struct mesh_area* mesh; + struct mesh_state_ref* super, ref; + struct mesh_state* mstate; + if(!qstate) + return; + mstate = qstate->mesh_info; + mesh = mstate->s.env->mesh; + mesh_detach_subs(&mstate->s); + if(mstate->list_select == mesh_forever_list) { + mesh->num_forever_states --; + mesh_list_remove(mstate, &mesh->forever_first, + &mesh->forever_last); + } else if(mstate->list_select == mesh_jostle_list) { + mesh_list_remove(mstate, &mesh->jostle_first, + &mesh->jostle_last); + } + if(!mstate->reply_list && !mstate->cb_list + && mstate->super_set.count == 0) { + log_assert(mesh->num_detached_states > 0); + mesh->num_detached_states--; + } + if(mstate->reply_list || mstate->cb_list) { + log_assert(mesh->num_reply_states > 0); + mesh->num_reply_states--; + } + ref.node.key = &ref; + ref.s = mstate; + RBTREE_FOR(super, struct mesh_state_ref*, &mstate->super_set) { + (void)rbtree_delete(&super->s->sub_set, &ref); + } + (void)rbtree_delete(&mesh->run, mstate); + (void)rbtree_delete(&mesh->all, mstate); + mesh_state_cleanup(mstate); +} + +/** helper recursive rbtree find routine */ +static int +find_in_subsub(struct mesh_state* m, struct mesh_state* tofind, size_t *c) +{ + struct mesh_state_ref* r; + if((*c)++ > MESH_MAX_SUBSUB) + return 1; + RBTREE_FOR(r, struct mesh_state_ref*, &m->sub_set) { + if(r->s == tofind || find_in_subsub(r->s, tofind, c)) + return 1; + } + return 0; +} + +/** find cycle for already looked up mesh_state */ +static int +mesh_detect_cycle_found(struct module_qstate* qstate, struct mesh_state* dep_m) +{ + struct mesh_state* cyc_m = qstate->mesh_info; + size_t counter = 0; + if(!dep_m) + return 0; + if(dep_m == cyc_m || find_in_subsub(dep_m, cyc_m, &counter)) { + if(counter > MESH_MAX_SUBSUB) + return 2; + return 1; + } + return 0; +} + +void mesh_detach_subs(struct module_qstate* qstate) +{ + struct mesh_area* mesh = qstate->env->mesh; + struct mesh_state_ref* ref, lookup; +#ifdef UNBOUND_DEBUG + struct rbnode_t* n; +#endif + lookup.node.key = &lookup; + lookup.s = qstate->mesh_info; + RBTREE_FOR(ref, struct mesh_state_ref*, &qstate->mesh_info->sub_set) { +#ifdef UNBOUND_DEBUG + n = +#else + (void) +#endif + rbtree_delete(&ref->s->super_set, &lookup); + log_assert(n != NULL); /* must have been present */ + if(!ref->s->reply_list && !ref->s->cb_list + && ref->s->super_set.count == 0) { + mesh->num_detached_states++; + log_assert(mesh->num_detached_states + + mesh->num_reply_states <= mesh->all.count); + } + } + rbtree_init(&qstate->mesh_info->sub_set, &mesh_state_ref_compare); +} + +int mesh_attach_sub(struct module_qstate* qstate, struct query_info* qinfo, + uint16_t qflags, int prime, struct module_qstate** newq) +{ + /* find it, if not, create it */ + struct mesh_area* mesh = qstate->env->mesh; + struct mesh_state* sub = mesh_area_find(mesh, qinfo, qflags, prime); + int was_detached; + if(mesh_detect_cycle_found(qstate, sub)) { + verbose(VERB_ALGO, "attach failed, cycle detected"); + return 0; + } + if(!sub) { +#ifdef UNBOUND_DEBUG + struct rbnode_t* n; +#endif + /* create a new one */ + sub = mesh_state_create(qstate->env, qinfo, qflags, prime); + if(!sub) { + log_err("mesh_attach_sub: out of memory"); + return 0; + } +#ifdef UNBOUND_DEBUG + n = +#else + (void) +#endif + rbtree_insert(&mesh->all, &sub->node); + log_assert(n != NULL); + /* set detached (it is now) */ + mesh->num_detached_states++; + /* set new query state to run */ +#ifdef UNBOUND_DEBUG + n = +#else + (void) +#endif + rbtree_insert(&mesh->run, &sub->run_node); + log_assert(n != NULL); + *newq = &sub->s; + } else + *newq = NULL; + was_detached = (sub->super_set.count == 0); + if(!mesh_state_attachment(qstate->mesh_info, sub)) + return 0; + /* if it was a duplicate attachment, the count was not zero before */ + if(!sub->reply_list && !sub->cb_list && was_detached && + sub->super_set.count == 1) { + /* it used to be detached, before this one got added */ + log_assert(mesh->num_detached_states > 0); + mesh->num_detached_states--; + } + /* *newq will be run when inited after the current module stops */ + return 1; +} + +int mesh_state_attachment(struct mesh_state* super, struct mesh_state* sub) +{ +#ifdef UNBOUND_DEBUG + struct rbnode_t* n; +#endif + struct mesh_state_ref* subref; /* points to sub, inserted in super */ + struct mesh_state_ref* superref; /* points to super, inserted in sub */ + if( !(subref = regional_alloc(super->s.region, + sizeof(struct mesh_state_ref))) || + !(superref = regional_alloc(sub->s.region, + sizeof(struct mesh_state_ref))) ) { + log_err("mesh_state_attachment: out of memory"); + return 0; + } + superref->node.key = superref; + superref->s = super; + subref->node.key = subref; + subref->s = sub; + if(!rbtree_insert(&sub->super_set, &superref->node)) { + /* this should not happen, iterator and validator do not + * attach subqueries that are identical. */ + /* already attached, we are done, nothing todo. + * since superref and subref already allocated in region, + * we cannot free them */ + return 1; + } +#ifdef UNBOUND_DEBUG + n = +#else + (void) +#endif + rbtree_insert(&super->sub_set, &subref->node); + log_assert(n != NULL); /* we checked above if statement, the reverse + administration should not fail now, unless they are out of sync */ + return 1; +} + +/** + * callback results to mesh cb entry + * @param m: mesh state to send it for. + * @param rcode: if not 0, error code. + * @param rep: reply to send (or NULL if rcode is set). + * @param r: callback entry + */ +static void +mesh_do_callback(struct mesh_state* m, int rcode, struct reply_info* rep, + struct mesh_cb* r) +{ + int secure; + char* reason = NULL; + /* bogus messages are not made into servfail, sec_status passed + * to the callback function */ + if(rep && rep->security == sec_status_secure) + secure = 1; + else secure = 0; + if(!rep && rcode == LDNS_RCODE_NOERROR) + rcode = LDNS_RCODE_SERVFAIL; + if(!rcode && rep->security == sec_status_bogus) { + if(!(reason = errinf_to_str(&m->s))) + rcode = LDNS_RCODE_SERVFAIL; + } + /* send the reply */ + if(rcode) { + fptr_ok(fptr_whitelist_mesh_cb(r->cb)); + (*r->cb)(r->cb_arg, rcode, r->buf, sec_status_unchecked, NULL); + } else { + size_t udp_size = r->edns.udp_size; + sldns_buffer_clear(r->buf); + r->edns.edns_version = EDNS_ADVERTISED_VERSION; + r->edns.udp_size = EDNS_ADVERTISED_SIZE; + r->edns.ext_rcode = 0; + r->edns.bits &= EDNS_DO; + if(!reply_info_answer_encode(&m->s.qinfo, rep, r->qid, + r->qflags, r->buf, 0, 1, + m->s.env->scratch, udp_size, &r->edns, + (int)(r->edns.bits & EDNS_DO), secure)) + { + fptr_ok(fptr_whitelist_mesh_cb(r->cb)); + (*r->cb)(r->cb_arg, LDNS_RCODE_SERVFAIL, r->buf, + sec_status_unchecked, NULL); + } else { + fptr_ok(fptr_whitelist_mesh_cb(r->cb)); + (*r->cb)(r->cb_arg, LDNS_RCODE_NOERROR, r->buf, + rep->security, reason); + } + } + free(reason); + m->s.env->mesh->num_reply_addrs--; +} + +/** + * Send reply to mesh reply entry + * @param m: mesh state to send it for. + * @param rcode: if not 0, error code. + * @param rep: reply to send (or NULL if rcode is set). + * @param r: reply entry + * @param prev: previous reply, already has its answer encoded in buffer. + */ +static void +mesh_send_reply(struct mesh_state* m, int rcode, struct reply_info* rep, + struct mesh_reply* r, struct mesh_reply* prev) +{ + struct timeval end_time; + struct timeval duration; + int secure; + /* examine security status */ + if(m->s.env->need_to_validate && (!(r->qflags&BIT_CD) || + m->s.env->cfg->ignore_cd) && rep && + rep->security <= sec_status_bogus) { + rcode = LDNS_RCODE_SERVFAIL; + if(m->s.env->cfg->stat_extended) + m->s.env->mesh->ans_bogus++; + } + if(rep && rep->security == sec_status_secure) + secure = 1; + else secure = 0; + if(!rep && rcode == LDNS_RCODE_NOERROR) + rcode = LDNS_RCODE_SERVFAIL; + /* send the reply */ + if(prev && prev->qflags == r->qflags && + prev->edns.edns_present == r->edns.edns_present && + prev->edns.bits == r->edns.bits && + prev->edns.udp_size == r->edns.udp_size) { + /* if the previous reply is identical to this one, fix ID */ + if(prev->query_reply.c->buffer != r->query_reply.c->buffer) + sldns_buffer_copy(r->query_reply.c->buffer, + prev->query_reply.c->buffer); + sldns_buffer_write_at(r->query_reply.c->buffer, 0, + &r->qid, sizeof(uint16_t)); + sldns_buffer_write_at(r->query_reply.c->buffer, 12, + r->qname, m->s.qinfo.qname_len); + comm_point_send_reply(&r->query_reply); + } else if(rcode) { + m->s.qinfo.qname = r->qname; + error_encode(r->query_reply.c->buffer, rcode, &m->s.qinfo, + r->qid, r->qflags, &r->edns); + comm_point_send_reply(&r->query_reply); + } else { + size_t udp_size = r->edns.udp_size; + r->edns.edns_version = EDNS_ADVERTISED_VERSION; + r->edns.udp_size = EDNS_ADVERTISED_SIZE; + r->edns.ext_rcode = 0; + r->edns.bits &= EDNS_DO; + m->s.qinfo.qname = r->qname; + if(!reply_info_answer_encode(&m->s.qinfo, rep, r->qid, + r->qflags, r->query_reply.c->buffer, 0, 1, + m->s.env->scratch, udp_size, &r->edns, + (int)(r->edns.bits & EDNS_DO), secure)) + { + error_encode(r->query_reply.c->buffer, + LDNS_RCODE_SERVFAIL, &m->s.qinfo, r->qid, + r->qflags, &r->edns); + } + comm_point_send_reply(&r->query_reply); + } + /* account */ + m->s.env->mesh->num_reply_addrs--; + end_time = *m->s.env->now_tv; + timeval_subtract(&duration, &end_time, &r->start_time); + verbose(VERB_ALGO, "query took " ARG_LL "d.%6.6d sec", + (long long)duration.tv_sec, (int)duration.tv_usec); + m->s.env->mesh->replies_sent++; + timeval_add(&m->s.env->mesh->replies_sum_wait, &duration); + timehist_insert(m->s.env->mesh->histogram, &duration); + if(m->s.env->cfg->stat_extended) { + uint16_t rc = FLAGS_GET_RCODE(sldns_buffer_read_u16_at(r-> + query_reply.c->buffer, 2)); + if(secure) m->s.env->mesh->ans_secure++; + m->s.env->mesh->ans_rcode[ rc ] ++; + if(rc == 0 && LDNS_ANCOUNT(sldns_buffer_begin(r-> + query_reply.c->buffer)) == 0) + m->s.env->mesh->ans_nodata++; + } +} + +void mesh_query_done(struct mesh_state* mstate) +{ + struct mesh_reply* r; + struct mesh_reply* prev = NULL; + struct mesh_cb* c; + struct reply_info* rep = (mstate->s.return_msg? + mstate->s.return_msg->rep:NULL); + for(r = mstate->reply_list; r; r = r->next) { + mesh_send_reply(mstate, mstate->s.return_rcode, rep, r, prev); + prev = r; + } + mstate->replies_sent = 1; + for(c = mstate->cb_list; c; c = c->next) { + mesh_do_callback(mstate, mstate->s.return_rcode, rep, c); + } +} + +void mesh_walk_supers(struct mesh_area* mesh, struct mesh_state* mstate) +{ + struct mesh_state_ref* ref; + RBTREE_FOR(ref, struct mesh_state_ref*, &mstate->super_set) + { + /* make super runnable */ + (void)rbtree_insert(&mesh->run, &ref->s->run_node); + /* callback the function to inform super of result */ + fptr_ok(fptr_whitelist_mod_inform_super( + mesh->mods.mod[ref->s->s.curmod]->inform_super)); + (*mesh->mods.mod[ref->s->s.curmod]->inform_super)(&mstate->s, + ref->s->s.curmod, &ref->s->s); + } +} + +struct mesh_state* mesh_area_find(struct mesh_area* mesh, + struct query_info* qinfo, uint16_t qflags, int prime) +{ + struct mesh_state key; + struct mesh_state* result; + + key.node.key = &key; + key.s.is_priming = prime; + key.s.qinfo = *qinfo; + key.s.query_flags = qflags; + + result = (struct mesh_state*)rbtree_search(&mesh->all, &key); + return result; +} + +int mesh_state_add_cb(struct mesh_state* s, struct edns_data* edns, + sldns_buffer* buf, mesh_cb_func_t cb, void* cb_arg, + uint16_t qid, uint16_t qflags) +{ + struct mesh_cb* r = regional_alloc(s->s.region, + sizeof(struct mesh_cb)); + if(!r) + return 0; + r->buf = buf; + log_assert(fptr_whitelist_mesh_cb(cb)); /* early failure ifmissing*/ + r->cb = cb; + r->cb_arg = cb_arg; + r->edns = *edns; + r->qid = qid; + r->qflags = qflags; + r->next = s->cb_list; + s->cb_list = r; + return 1; + +} + +int mesh_state_add_reply(struct mesh_state* s, struct edns_data* edns, + struct comm_reply* rep, uint16_t qid, uint16_t qflags, uint8_t* qname) +{ + struct mesh_reply* r = regional_alloc(s->s.region, + sizeof(struct mesh_reply)); + if(!r) + return 0; + r->query_reply = *rep; + r->edns = *edns; + r->qid = qid; + r->qflags = qflags; + r->start_time = *s->s.env->now_tv; + r->next = s->reply_list; + r->qname = regional_alloc_init(s->s.region, qname, + s->s.qinfo.qname_len); + if(!r->qname) + return 0; + s->reply_list = r; + return 1; + +} + +/** + * Continue processing the mesh state at another module. + * Handles module to modules tranfer of control. + * Handles module finished. + * @param mesh: the mesh area. + * @param mstate: currently active mesh state. + * Deleted if finished, calls _done and _supers to + * send replies to clients and inform other mesh states. + * This in turn may create additional runnable mesh states. + * @param s: state at which the current module exited. + * @param ev: the event sent to the module. + * returned is the event to send to the next module. + * @return true if continue processing at the new module. + * false if not continued processing is needed. + */ +static int +mesh_continue(struct mesh_area* mesh, struct mesh_state* mstate, + enum module_ext_state s, enum module_ev* ev) +{ + mstate->num_activated++; + if(mstate->num_activated > MESH_MAX_ACTIVATION) { + /* module is looping. Stop it. */ + log_err("internal error: looping module stopped"); + log_query_info(VERB_QUERY, "pass error for qstate", + &mstate->s.qinfo); + s = module_error; + } + if(s == module_wait_module || s == module_restart_next) { + /* start next module */ + mstate->s.curmod++; + if(mesh->mods.num == mstate->s.curmod) { + log_err("Cannot pass to next module; at last module"); + log_query_info(VERB_QUERY, "pass error for qstate", + &mstate->s.qinfo); + mstate->s.curmod--; + return mesh_continue(mesh, mstate, module_error, ev); + } + if(s == module_restart_next) { + fptr_ok(fptr_whitelist_mod_clear( + mesh->mods.mod[mstate->s.curmod]->clear)); + (*mesh->mods.mod[mstate->s.curmod]->clear) + (&mstate->s, mstate->s.curmod); + mstate->s.minfo[mstate->s.curmod] = NULL; + } + *ev = module_event_pass; + return 1; + } + if(s == module_error && mstate->s.return_rcode == LDNS_RCODE_NOERROR) { + /* error is bad, handle pass back up below */ + mstate->s.return_rcode = LDNS_RCODE_SERVFAIL; + } + if(s == module_error || s == module_finished) { + if(mstate->s.curmod == 0) { + mesh_query_done(mstate); + mesh_walk_supers(mesh, mstate); + mesh_state_delete(&mstate->s); + return 0; + } + /* pass along the locus of control */ + mstate->s.curmod --; + *ev = module_event_moddone; + return 1; + } + return 0; +} + +void mesh_run(struct mesh_area* mesh, struct mesh_state* mstate, + enum module_ev ev, struct outbound_entry* e) +{ + enum module_ext_state s; + verbose(VERB_ALGO, "mesh_run: start"); + while(mstate) { + /* run the module */ + fptr_ok(fptr_whitelist_mod_operate( + mesh->mods.mod[mstate->s.curmod]->operate)); + (*mesh->mods.mod[mstate->s.curmod]->operate) + (&mstate->s, ev, mstate->s.curmod, e); + + /* examine results */ + mstate->s.reply = NULL; + regional_free_all(mstate->s.env->scratch); + s = mstate->s.ext_state[mstate->s.curmod]; + verbose(VERB_ALGO, "mesh_run: %s module exit state is %s", + mesh->mods.mod[mstate->s.curmod]->name, strextstate(s)); + e = NULL; + if(mesh_continue(mesh, mstate, s, &ev)) + continue; + + /* run more modules */ + ev = module_event_pass; + if(mesh->run.count > 0) { + /* pop random element off the runnable tree */ + mstate = (struct mesh_state*)mesh->run.root->key; + (void)rbtree_delete(&mesh->run, mstate); + } else mstate = NULL; + } + if(verbosity >= VERB_ALGO) { + mesh_stats(mesh, "mesh_run: end"); + mesh_log_list(mesh); + } +} + +void +mesh_log_list(struct mesh_area* mesh) +{ + char buf[30]; + struct mesh_state* m; + int num = 0; + RBTREE_FOR(m, struct mesh_state*, &mesh->all) { + snprintf(buf, sizeof(buf), "%d%s%s%s%s%s mod%d %s%s", + num++, (m->s.is_priming)?"p":"", /* prime */ + (m->s.query_flags&BIT_RD)?"RD":"", + (m->s.query_flags&BIT_CD)?"CD":"", + (m->super_set.count==0)?"d":"", /* detached */ + (m->sub_set.count!=0)?"c":"", /* children */ + m->s.curmod, (m->reply_list)?"rep":"", /*hasreply*/ + (m->cb_list)?"cb":"" /* callbacks */ + ); + log_query_info(VERB_ALGO, buf, &m->s.qinfo); + } +} + +void +mesh_stats(struct mesh_area* mesh, const char* str) +{ + verbose(VERB_DETAIL, "%s %u recursion states (%u with reply, " + "%u detached), %u waiting replies, %u recursion replies " + "sent, %d replies dropped, %d states jostled out", + str, (unsigned)mesh->all.count, + (unsigned)mesh->num_reply_states, + (unsigned)mesh->num_detached_states, + (unsigned)mesh->num_reply_addrs, + (unsigned)mesh->replies_sent, + (unsigned)mesh->stats_dropped, + (unsigned)mesh->stats_jostled); + if(mesh->replies_sent > 0) { + struct timeval avg; + timeval_divide(&avg, &mesh->replies_sum_wait, + mesh->replies_sent); + log_info("average recursion processing time " + ARG_LL "d.%6.6d sec", + (long long)avg.tv_sec, (int)avg.tv_usec); + log_info("histogram of recursion processing times"); + timehist_log(mesh->histogram, "recursions"); + } +} + +void +mesh_stats_clear(struct mesh_area* mesh) +{ + if(!mesh) + return; + mesh->replies_sent = 0; + mesh->replies_sum_wait.tv_sec = 0; + mesh->replies_sum_wait.tv_usec = 0; + mesh->stats_jostled = 0; + mesh->stats_dropped = 0; + timehist_clear(mesh->histogram); + mesh->ans_secure = 0; + mesh->ans_bogus = 0; + memset(&mesh->ans_rcode[0], 0, sizeof(size_t)*16); + mesh->ans_nodata = 0; +} + +size_t +mesh_get_mem(struct mesh_area* mesh) +{ + struct mesh_state* m; + size_t s = sizeof(*mesh) + sizeof(struct timehist) + + sizeof(struct th_buck)*mesh->histogram->num + + sizeof(sldns_buffer) + sldns_buffer_capacity(mesh->qbuf_bak); + RBTREE_FOR(m, struct mesh_state*, &mesh->all) { + /* all, including m itself allocated in qstate region */ + s += regional_get_mem(m->s.region); + } + return s; +} + +int +mesh_detect_cycle(struct module_qstate* qstate, struct query_info* qinfo, + uint16_t flags, int prime) +{ + struct mesh_area* mesh = qstate->env->mesh; + struct mesh_state* dep_m = mesh_area_find(mesh, qinfo, flags, prime); + return mesh_detect_cycle_found(qstate, dep_m); +} + +void mesh_list_insert(struct mesh_state* m, struct mesh_state** fp, + struct mesh_state** lp) +{ + /* insert as last element */ + m->prev = *lp; + m->next = NULL; + if(*lp) + (*lp)->next = m; + else *fp = m; + *lp = m; +} + +void mesh_list_remove(struct mesh_state* m, struct mesh_state** fp, + struct mesh_state** lp) +{ + if(m->next) + m->next->prev = m->prev; + else *lp = m->prev; + if(m->prev) + m->prev->next = m->next; + else *fp = m->next; +} diff --git a/external/unbound/services/mesh.h b/external/unbound/services/mesh.h new file mode 100644 index 000000000..fbfbbcb4a --- /dev/null +++ b/external/unbound/services/mesh.h @@ -0,0 +1,572 @@ +/* + * services/mesh.h - deal with mesh of query states and handle events for that. + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file contains functions to assist in dealing with a mesh of + * query states. This mesh is supposed to be thread-specific. + * It consists of query states (per qname, qtype, qclass) and connections + * between query states and the super and subquery states, and replies to + * send back to clients. + */ + +#ifndef SERVICES_MESH_H +#define SERVICES_MESH_H + +#include "util/rbtree.h" +#include "util/netevent.h" +#include "util/data/msgparse.h" +#include "util/module.h" +#include "services/modstack.h" +struct sldns_buffer; +struct mesh_state; +struct mesh_reply; +struct mesh_cb; +struct query_info; +struct reply_info; +struct outbound_entry; +struct timehist; + +/** + * Maximum number of mesh state activations. Any more is likely an + * infinite loop in the module. It is then terminated. + */ +#define MESH_MAX_ACTIVATION 3000 + +/** + * Max number of references-to-references-to-references.. search size. + * Any more is treated like 'too large', and the creation of a new + * dependency is failed (so that no loops can be created). + */ +#define MESH_MAX_SUBSUB 1024 + +/** + * Mesh of query states + */ +struct mesh_area { + /** active module stack */ + struct module_stack mods; + /** environment for new states */ + struct module_env* env; + + /** set of runnable queries (mesh_state.run_node) */ + rbtree_t run; + /** rbtree of all current queries (mesh_state.node)*/ + rbtree_t all; + + /** count of the total number of mesh_reply entries */ + size_t num_reply_addrs; + /** count of the number of mesh_states that have mesh_replies + * Because a state can send results to multiple reply addresses, + * this number must be equal or lower than num_reply_addrs. */ + size_t num_reply_states; + /** number of mesh_states that have no mesh_replies, and also + * an empty set of super-states, thus are 'toplevel' or detached + * internal opportunistic queries */ + size_t num_detached_states; + /** number of reply states in the forever list */ + size_t num_forever_states; + + /** max total number of reply states to have */ + size_t max_reply_states; + /** max forever number of reply states to have */ + size_t max_forever_states; + + /** stats, cumulative number of reply states jostled out */ + size_t stats_jostled; + /** stats, cumulative number of incoming client msgs dropped */ + size_t stats_dropped; + /** number of replies sent */ + size_t replies_sent; + /** sum of waiting times for the replies */ + struct timeval replies_sum_wait; + /** histogram of time values */ + struct timehist* histogram; + /** (extended stats) secure replies */ + size_t ans_secure; + /** (extended stats) bogus replies */ + size_t ans_bogus; + /** (extended stats) rcodes in replies */ + size_t ans_rcode[16]; + /** (extended stats) rcode nodata in replies */ + size_t ans_nodata; + + /** backup of query if other operations recurse and need the + * network buffers */ + struct sldns_buffer* qbuf_bak; + + /** double linked list of the run-to-completion query states. + * These are query states with a reply */ + struct mesh_state* forever_first; + /** last entry in run forever list */ + struct mesh_state* forever_last; + + /** double linked list of the query states that can be jostled out + * by new queries if too old. These are query states with a reply */ + struct mesh_state* jostle_first; + /** last entry in jostle list - this is the entry that is newest */ + struct mesh_state* jostle_last; + /** timeout for jostling. if age is lower, it does not get jostled. */ + struct timeval jostle_max; +}; + +/** + * A mesh query state + * Unique per qname, qtype, qclass (from the qstate). + * And RD / CD flag; in case a client turns it off. + * And priming queries are different from ordinary queries (because of hints). + * + * The entire structure is allocated in a region, this region is the qstate + * region. All parts (rbtree nodes etc) are also allocated in the region. + */ +struct mesh_state { + /** node in mesh_area all tree, key is this struct. Must be first. */ + rbnode_t node; + /** node in mesh_area runnable tree, key is this struct */ + rbnode_t run_node; + /** the query state. Note that the qinfo and query_flags + * may not change. */ + struct module_qstate s; + /** the list of replies to clients for the results */ + struct mesh_reply* reply_list; + /** the list of callbacks for the results */ + struct mesh_cb* cb_list; + /** set of superstates (that want this state's result) + * contains struct mesh_state_ref* */ + rbtree_t super_set; + /** set of substates (that this state needs to continue) + * contains struct mesh_state_ref* */ + rbtree_t sub_set; + /** number of activations for the mesh state */ + size_t num_activated; + + /** previous in linked list for reply states */ + struct mesh_state* prev; + /** next in linked list for reply states */ + struct mesh_state* next; + /** if this state is in the forever list, jostle list, or neither */ + enum mesh_list_select { mesh_no_list, mesh_forever_list, + mesh_jostle_list } list_select; + + /** true if replies have been sent out (at end for alignment) */ + uint8_t replies_sent; +}; + +/** + * Rbtree reference to a mesh_state. + * Used in super_set and sub_set. + */ +struct mesh_state_ref { + /** node in rbtree for set, key is this structure */ + rbnode_t node; + /** the mesh state */ + struct mesh_state* s; +}; + +/** + * Reply to a client + */ +struct mesh_reply { + /** next in reply list */ + struct mesh_reply* next; + /** the query reply destination, packet buffer and where to send. */ + struct comm_reply query_reply; + /** edns data from query */ + struct edns_data edns; + /** the time when request was entered */ + struct timeval start_time; + /** id of query, in network byteorder. */ + uint16_t qid; + /** flags of query, for reply flags */ + uint16_t qflags; + /** qname from this query. len same as mesh qinfo. */ + uint8_t* qname; +}; + +/** + * Mesh result callback func. + * called as func(cb_arg, rcode, buffer_with_reply, security, why_bogus); + */ +typedef void (*mesh_cb_func_t)(void*, int, struct sldns_buffer*, enum sec_status, + char*); + +/** + * Callback to result routine + */ +struct mesh_cb { + /** next in list */ + struct mesh_cb* next; + /** edns data from query */ + struct edns_data edns; + /** id of query, in network byteorder. */ + uint16_t qid; + /** flags of query, for reply flags */ + uint16_t qflags; + /** buffer for reply */ + struct sldns_buffer* buf; + + /** callback routine for results. if rcode != 0 buf has message. + * called as cb(cb_arg, rcode, buf, sec_state); + */ + mesh_cb_func_t cb; + /** user arg for callback */ + void* cb_arg; +}; + +/* ------------------- Functions for worker -------------------- */ + +/** + * Allocate mesh, to empty. + * @param stack: module stack to activate, copied (as readonly reference). + * @param env: environment for new queries. + * @return mesh: the new mesh or NULL on error. + */ +struct mesh_area* mesh_create(struct module_stack* stack, + struct module_env* env); + +/** + * Delete mesh, and all query states and replies in it. + * @param mesh: the mesh to delete. + */ +void mesh_delete(struct mesh_area* mesh); + +/** + * New query incoming from clients. Create new query state if needed, and + * add mesh_reply to it. Returns error to client on malloc failures. + * Will run the mesh area queries to process if a new query state is created. + * + * @param mesh: the mesh. + * @param qinfo: query from client. + * @param qflags: flags from client query. + * @param edns: edns data from client query. + * @param rep: where to reply to. + * @param qid: query id to reply with. + */ +void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo, + uint16_t qflags, struct edns_data* edns, struct comm_reply* rep, + uint16_t qid); + +/** + * New query with callback. Create new query state if needed, and + * add mesh_cb to it. + * Will run the mesh area queries to process if a new query state is created. + * + * @param mesh: the mesh. + * @param qinfo: query from client. + * @param qflags: flags from client query. + * @param edns: edns data from client query. + * @param buf: buffer for reply contents. + * @param qid: query id to reply with. + * @param cb: callback function. + * @param cb_arg: callback user arg. + * @return 0 on error. + */ +int mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo, + uint16_t qflags, struct edns_data* edns, struct sldns_buffer* buf, + uint16_t qid, mesh_cb_func_t cb, void* cb_arg); + +/** + * New prefetch message. Create new query state if needed. + * Will run the mesh area queries to process if a new query state is created. + * + * @param mesh: the mesh. + * @param qinfo: query from client. + * @param qflags: flags from client query. + * @param leeway: TTL leeway what to expire earlier for this update. + */ +void mesh_new_prefetch(struct mesh_area* mesh, struct query_info* qinfo, + uint16_t qflags, time_t leeway); + +/** + * Handle new event from the wire. A serviced query has returned. + * The query state will be made runnable, and the mesh_area will process + * query states until processing is complete. + * + * @param mesh: the query mesh. + * @param e: outbound entry, with query state to run and reply pointer. + * @param reply: the comm point reply info. + * @param what: NETEVENT_* error code (if not 0, what is wrong, TIMEOUT). + */ +void mesh_report_reply(struct mesh_area* mesh, struct outbound_entry* e, + struct comm_reply* reply, int what); + +/* ------------------- Functions for module environment --------------- */ + +/** + * Detach-subqueries. + * Remove all sub-query references from this query state. + * Keeps super-references of those sub-queries correct. + * Updates stat items in mesh_area structure. + * @param qstate: used to find mesh state. + */ +void mesh_detach_subs(struct module_qstate* qstate); + +/** + * Attach subquery. + * Creates it if it does not exist already. + * Keeps sub and super references correct. + * Performs a cycle detection - for double check - and fails if there is one. + * Also fails if the sub-sub-references become too large. + * Updates stat items in mesh_area structure. + * Pass if it is priming query or not. + * return: + * o if error (malloc) happened. + * o need to initialise the new state (module init; it is a new state). + * so that the next run of the query with this module is successful. + * o no init needed, attachment successful. + * + * @param qstate: the state to find mesh state, and that wants to receive + * the results from the new subquery. + * @param qinfo: what to query for (copied). + * @param qflags: what flags to use (RD / CD flag or not). + * @param prime: if it is a (stub) priming query. + * @param newq: If the new subquery needs initialisation, it is returned, + * otherwise NULL is returned. + * @return: false on error, true if success (and init may be needed). + */ +int mesh_attach_sub(struct module_qstate* qstate, struct query_info* qinfo, + uint16_t qflags, int prime, struct module_qstate** newq); + +/** + * Query state is done, send messages to reply entries. + * Encode messages using reply entry values and the querystate (with original + * qinfo), using given reply_info. + * Pass errcode != 0 if an error reply is needed. + * If no reply entries, nothing is done. + * Must be called before a module can module_finished or return module_error. + * The module must handle the super query states itself as well. + * + * @param mstate: mesh state that is done. return_rcode and return_msg + * are used for replies. + * return_rcode: if not 0 (NOERROR) an error is sent back (and + * return_msg is ignored). + * return_msg: reply to encode and send back to clients. + */ +void mesh_query_done(struct mesh_state* mstate); + +/** + * Call inform_super for the super query states that are interested in the + * results from this query state. These can then be changed for error + * or results. + * Called when a module is module_finished or returns module_error. + * The super query states become runnable with event module_event_pass, + * it calls the current module for the super with the inform_super event. + * + * @param mesh: mesh area to add newly runnable modules to. + * @param mstate: the state that has results, used to find mesh state. + */ +void mesh_walk_supers(struct mesh_area* mesh, struct mesh_state* mstate); + +/** + * Delete mesh state, cleanup and also rbtrees and so on. + * Will detach from all super/subnodes. + * @param qstate: to remove. + */ +void mesh_state_delete(struct module_qstate* qstate); + +/* ------------------- Functions for mesh -------------------- */ + +/** + * Create and initialize a new mesh state and its query state + * Does not put the mesh state into rbtrees and so on. + * @param env: module environment to set. + * @param qinfo: query info that the mesh is for. + * @param qflags: flags for query (RD / CD flag). + * @param prime: if true, it is a priming query, set is_priming on mesh state. + * @return: new mesh state or NULL on allocation error. + */ +struct mesh_state* mesh_state_create(struct module_env* env, + struct query_info* qinfo, uint16_t qflags, int prime); + +/** + * Cleanup a mesh state and its query state. Does not do rbtree or + * reference cleanup. + * @param mstate: mesh state to cleanup. Its pointer may no longer be used + * afterwards. Cleanup rbtrees before calling this function. + */ +void mesh_state_cleanup(struct mesh_state* mstate); + +/** + * Delete all mesh states from the mesh. + * @param mesh: the mesh area to clear + */ +void mesh_delete_all(struct mesh_area* mesh); + +/** + * Find a mesh state in the mesh area. Pass relevant flags. + * + * @param mesh: the mesh area to look in. + * @param qinfo: what query + * @param qflags: if RD / CD bit is set or not. + * @param prime: if it is a priming query. + * @return: mesh state or NULL if not found. + */ +struct mesh_state* mesh_area_find(struct mesh_area* mesh, + struct query_info* qinfo, uint16_t qflags, int prime); + +/** + * Setup attachment super/sub relation between super and sub mesh state. + * The relation must not be present when calling the function. + * Does not update stat items in mesh_area. + * @param super: super state. + * @param sub: sub state. + * @return: 0 on alloc error. + */ +int mesh_state_attachment(struct mesh_state* super, struct mesh_state* sub); + +/** + * Create new reply structure and attach it to a mesh state. + * Does not update stat items in mesh area. + * @param s: the mesh state. + * @param edns: edns data for reply (bufsize). + * @param rep: comm point reply info. + * @param qid: ID of reply. + * @param qflags: original query flags. + * @param qname: original query name. + * @return: 0 on alloc error. + */ +int mesh_state_add_reply(struct mesh_state* s, struct edns_data* edns, + struct comm_reply* rep, uint16_t qid, uint16_t qflags, uint8_t* qname); + +/** + * Create new callback structure and attach it to a mesh state. + * Does not update stat items in mesh area. + * @param s: the mesh state. + * @param edns: edns data for reply (bufsize). + * @param buf: buffer for reply + * @param cb: callback to call with results. + * @param cb_arg: callback user arg. + * @param qid: ID of reply. + * @param qflags: original query flags. + * @return: 0 on alloc error. + */ +int mesh_state_add_cb(struct mesh_state* s, struct edns_data* edns, + struct sldns_buffer* buf, mesh_cb_func_t cb, void* cb_arg, uint16_t qid, + uint16_t qflags); + +/** + * Run the mesh. Run all runnable mesh states. Which can create new + * runnable mesh states. Until completion. Automatically called by + * mesh_report_reply and mesh_new_client as needed. + * @param mesh: mesh area. + * @param mstate: first mesh state to run. + * @param ev: event the mstate. Others get event_pass. + * @param e: if a reply, its outbound entry. + */ +void mesh_run(struct mesh_area* mesh, struct mesh_state* mstate, + enum module_ev ev, struct outbound_entry* e); + +/** + * Print some stats about the mesh to the log. + * @param mesh: the mesh to print it for. + * @param str: descriptive string to go with it. + */ +void mesh_stats(struct mesh_area* mesh, const char* str); + +/** + * Clear the stats that the mesh keeps (number of queries serviced) + * @param mesh: the mesh + */ +void mesh_stats_clear(struct mesh_area* mesh); + +/** + * Print all the states in the mesh to the log. + * @param mesh: the mesh to print all states of. + */ +void mesh_log_list(struct mesh_area* mesh); + +/** + * Calculate memory size in use by mesh and all queries inside it. + * @param mesh: the mesh to examine. + * @return size in bytes. + */ +size_t mesh_get_mem(struct mesh_area* mesh); + +/** + * Find cycle; see if the given mesh is in the targets sub, or sub-sub, ... + * trees. + * If the sub-sub structure is too large, it returns 'a cycle'=2. + * @param qstate: given mesh querystate. + * @param qinfo: query info for dependency. + * @param flags: query flags of dependency. + * @param prime: if dependency is a priming query or not. + * @return true if the name,type,class exists and the given qstate mesh exists + * as a dependency of that name. Thus if qstate becomes dependent on + * name,type,class then a cycle is created, this is return value 1. + * Too large to search is value 2 (also true). + */ +int mesh_detect_cycle(struct module_qstate* qstate, struct query_info* qinfo, + uint16_t flags, int prime); + +/** compare two mesh_states */ +int mesh_state_compare(const void* ap, const void* bp); + +/** compare two mesh references */ +int mesh_state_ref_compare(const void* ap, const void* bp); + +/** + * Make space for another recursion state for a reply in the mesh + * @param mesh: mesh area + * @param qbuf: query buffer to save if recursion is invoked to make space. + * This buffer is necessary, because the following sequence in calls + * can result in an overwrite of the incoming query: + * delete_other_mesh_query - iter_clean - serviced_delete - waiting + * udp query is sent - on error callback - callback sends SERVFAIL reply + * over the same network channel, and shared UDP buffer is overwritten. + * You can pass NULL if there is no buffer that must be backed up. + * @return false if no space is available. + */ +int mesh_make_new_space(struct mesh_area* mesh, struct sldns_buffer* qbuf); + +/** + * Insert mesh state into a double linked list. Inserted at end. + * @param m: mesh state. + * @param fp: pointer to the first-elem-pointer of the list. + * @param lp: pointer to the last-elem-pointer of the list. + */ +void mesh_list_insert(struct mesh_state* m, struct mesh_state** fp, + struct mesh_state** lp); + +/** + * Remove mesh state from a double linked list. Remove from any position. + * @param m: mesh state. + * @param fp: pointer to the first-elem-pointer of the list. + * @param lp: pointer to the last-elem-pointer of the list. + */ +void mesh_list_remove(struct mesh_state* m, struct mesh_state** fp, + struct mesh_state** lp); + +#endif /* SERVICES_MESH_H */ diff --git a/external/unbound/services/modstack.c b/external/unbound/services/modstack.c new file mode 100644 index 000000000..a99030bc3 --- /dev/null +++ b/external/unbound/services/modstack.c @@ -0,0 +1,215 @@ +/* + * services/modstack.c - stack of modules + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file contains functions to help maintain a stack of modules. + */ +#include "config.h" +#include <ctype.h> +#include "services/modstack.h" +#include "util/module.h" +#include "util/fptr_wlist.h" +#include "dns64/dns64.h" +#include "iterator/iterator.h" +#include "validator/validator.h" + +#ifdef WITH_PYTHONMODULE +#include "pythonmod/pythonmod.h" +#endif + +/** count number of modules (words) in the string */ +static int +count_modules(const char* s) +{ + int num = 0; + if(!s) + return 0; + while(*s) { + /* skip whitespace */ + while(*s && isspace((int)*s)) + s++; + if(*s && !isspace((int)*s)) { + /* skip identifier */ + num++; + while(*s && !isspace((int)*s)) + s++; + } + } + return num; +} + +void +modstack_init(struct module_stack* stack) +{ + stack->num = 0; + stack->mod = NULL; +} + +int +modstack_config(struct module_stack* stack, const char* module_conf) +{ + int i; + verbose(VERB_QUERY, "module config: \"%s\"", module_conf); + stack->num = count_modules(module_conf); + if(stack->num == 0) { + log_err("error: no modules specified"); + return 0; + } + if(stack->num > MAX_MODULE) { + log_err("error: too many modules (%d max %d)", + stack->num, MAX_MODULE); + return 0; + } + stack->mod = (struct module_func_block**)calloc((size_t) + stack->num, sizeof(struct module_func_block*)); + if(!stack->mod) { + log_err("out of memory"); + return 0; + } + for(i=0; i<stack->num; i++) { + stack->mod[i] = module_factory(&module_conf); + if(!stack->mod[i]) { + log_err("Unknown value for next module: '%s'", + module_conf); + return 0; + } + } + return 1; +} + +/** The list of module names */ +const char** +module_list_avail(void) +{ + /* these are the modules available */ + static const char* names[] = { + "dns64", +#ifdef WITH_PYTHONMODULE + "python", +#endif + "validator", + "iterator", + NULL}; + return names; +} + +/** func block get function type */ +typedef struct module_func_block* (*fbgetfunctype)(void); + +/** The list of module func blocks */ +static fbgetfunctype* +module_funcs_avail(void) +{ + static struct module_func_block* (*fb[])(void) = { + &dns64_get_funcblock, +#ifdef WITH_PYTHONMODULE + &pythonmod_get_funcblock, +#endif + &val_get_funcblock, + &iter_get_funcblock, + NULL}; + return fb; +} + +struct +module_func_block* module_factory(const char** str) +{ + int i = 0; + const char* s = *str; + const char** names = module_list_avail(); + fbgetfunctype* fb = module_funcs_avail(); + while(*s && isspace((int)*s)) + s++; + while(names[i]) { + if(strncmp(names[i], s, strlen(names[i])) == 0) { + s += strlen(names[i]); + *str = s; + return (*fb[i])(); + } + i++; + } + return NULL; +} + +int +modstack_setup(struct module_stack* stack, const char* module_conf, + struct module_env* env) +{ + int i; + if(stack->num != 0) + modstack_desetup(stack, env); + /* fixed setup of the modules */ + if(!modstack_config(stack, module_conf)) { + return 0; + } + env->need_to_validate = 0; /* set by module init below */ + for(i=0; i<stack->num; i++) { + verbose(VERB_OPS, "init module %d: %s", + i, stack->mod[i]->name); + fptr_ok(fptr_whitelist_mod_init(stack->mod[i]->init)); + if(!(*stack->mod[i]->init)(env, i)) { + log_err("module init for module %s failed", + stack->mod[i]->name); + return 0; + } + } + return 1; +} + +void +modstack_desetup(struct module_stack* stack, struct module_env* env) +{ + int i; + for(i=0; i<stack->num; i++) { + fptr_ok(fptr_whitelist_mod_deinit(stack->mod[i]->deinit)); + (*stack->mod[i]->deinit)(env, i); + } + stack->num = 0; + free(stack->mod); + stack->mod = NULL; +} + +int +modstack_find(struct module_stack* stack, const char* name) +{ + int i; + for(i=0; i<stack->num; i++) { + if(strcmp(stack->mod[i]->name, name) == 0) + return i; + } + return -1; +} diff --git a/external/unbound/services/modstack.h b/external/unbound/services/modstack.h new file mode 100644 index 000000000..cb8613299 --- /dev/null +++ b/external/unbound/services/modstack.h @@ -0,0 +1,113 @@ +/* + * services/modstack.h - stack of modules + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file contains functions to help maintain a stack of modules. + */ + +#ifndef SERVICES_MODSTACK_H +#define SERVICES_MODSTACK_H +struct module_func_block; +struct module_env; + +/** + * Stack of modules. + */ +struct module_stack { + /** the number of modules */ + int num; + /** the module callbacks, array of num_modules length (ref only) */ + struct module_func_block** mod; +}; + +/** + * Init a stack of modules + * @param stack: initialised as empty. + */ +void modstack_init(struct module_stack* stack); + +/** + * Read config file module settings and set up the modfunc block + * @param stack: the stack of modules (empty before call). + * @param module_conf: string what modules to insert. + * @return false on error + */ +int modstack_config(struct module_stack* stack, const char* module_conf); + +/** + * Get funcblock for module name + * @param str: string with module name. Advanced to next value on success. + * The string is assumed whitespace separated list of module names. + * @return funcblock or NULL on error. + */ +struct module_func_block* module_factory(const char** str); + +/** + * Get list of modules available. + * @return list of modules available. Static strings, ends with NULL. + */ +const char** module_list_avail(void); + +/** + * Setup modules. Assigns ids and calls module_init. + * @param stack: if not empty beforehand, it will be desetup()ed. + * It is then modstack_configged(). + * @param module_conf: string what modules to insert. + * @param env: module environment which is inited by the modules. + * environment should have a superalloc, cfg, + * env.need_to_validate is set by the modules. + * @return on false a module init failed. + */ +int modstack_setup(struct module_stack* stack, const char* module_conf, + struct module_env* env); + +/** + * Desetup the modules, deinit, delete. + * @param stack: made empty. + * @param env: module env for module deinit() calls. + */ +void modstack_desetup(struct module_stack* stack, struct module_env* env); + +/** + * Find index of module by name. + * @param stack: to look in + * @param name: the name to look for + * @return -1 on failure, otherwise index number. + */ +int modstack_find(struct module_stack* stack, const char* name); + +#endif /* SERVICES_MODSTACK_H */ diff --git a/external/unbound/services/outbound_list.c b/external/unbound/services/outbound_list.c new file mode 100644 index 000000000..ad73380bc --- /dev/null +++ b/external/unbound/services/outbound_list.c @@ -0,0 +1,89 @@ +/* + * services/outbound_list.c - keep list of outbound serviced queries. + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file contains functions to help a module keep track of the + * queries it has outstanding to authoritative servers. + */ +#include "config.h" +#include <sys/time.h> +#include "services/outbound_list.h" +#include "services/outside_network.h" + +void +outbound_list_init(struct outbound_list* list) +{ + list->first = NULL; +} + +void +outbound_list_clear(struct outbound_list* list) +{ + struct outbound_entry *p, *np; + p = list->first; + while(p) { + np = p->next; + outnet_serviced_query_stop(p->qsent, p); + /* in region, no free needed */ + p = np; + } + outbound_list_init(list); +} + +void +outbound_list_insert(struct outbound_list* list, struct outbound_entry* e) +{ + if(list->first) + list->first->prev = e; + e->next = list->first; + e->prev = NULL; + list->first = e; +} + +void +outbound_list_remove(struct outbound_list* list, struct outbound_entry* e) +{ + if(!e) + return; + outnet_serviced_query_stop(e->qsent, e); + if(e->next) + e->next->prev = e->prev; + if(e->prev) + e->prev->next = e->next; + else list->first = e->next; + /* in region, no free needed */ +} diff --git a/external/unbound/services/outbound_list.h b/external/unbound/services/outbound_list.h new file mode 100644 index 000000000..ad59e42d1 --- /dev/null +++ b/external/unbound/services/outbound_list.h @@ -0,0 +1,105 @@ +/* + * services/outbound_list.h - keep list of outbound serviced queries. + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file contains functions to help a module keep track of the + * queries it has outstanding to authoritative servers. + */ +#ifndef SERVICES_OUTBOUND_LIST_H +#define SERVICES_OUTBOUND_LIST_H +struct outbound_entry; +struct serviced_query; +struct module_qstate; + +/** + * The outbound list. This structure is part of the module specific query + * state. + */ +struct outbound_list { + /** The linked list of outbound query entries. */ + struct outbound_entry* first; +}; + +/** + * Outbound list entry. A serviced query sent by a module processing the + * query from the qstate. Double linked list to aid removal. + */ +struct outbound_entry { + /** next in list */ + struct outbound_entry* next; + /** prev in list */ + struct outbound_entry* prev; + /** The query that was sent out */ + struct serviced_query* qsent; + /** the module query state that sent it */ + struct module_qstate* qstate; +}; + +/** + * Init the user allocated outbound list structure + * @param list: the list structure. + */ +void outbound_list_init(struct outbound_list* list); + +/** + * Clear the user owner outbound list structure. + * Deletes serviced queries. + * @param list: the list structure. It is cleared, but the list struct itself + * is callers responsability to delete. + */ +void outbound_list_clear(struct outbound_list* list); + +/** + * Insert new entry into the list. Caller must allocate the entry with malloc. + * qstate and qsent are set by caller. + * @param list: the list to add to. + * @param e: entry to add, it is only half initialised at call start, fully + * initialised at call end. + */ +void outbound_list_insert(struct outbound_list* list, + struct outbound_entry* e); + +/** + * Remove an entry from the list, and deletes it. + * Deletes serviced query in the entry. + * @param list: the list to remove from. + * @param e: the entry to remove. + */ +void outbound_list_remove(struct outbound_list* list, + struct outbound_entry* e); + +#endif /* SERVICES_OUTBOUND_LIST_H */ diff --git a/external/unbound/services/outside_network.c b/external/unbound/services/outside_network.c new file mode 100644 index 000000000..986c53dd8 --- /dev/null +++ b/external/unbound/services/outside_network.c @@ -0,0 +1,2052 @@ +/* + * services/outside_network.c - implement sending of queries and wait answer. + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file has functions to send queries to authoritative servers and + * wait for the pending answer events. + */ +#include "config.h" +#include <ctype.h> +#ifdef HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif +#include <sys/time.h> +#include "services/outside_network.h" +#include "services/listen_dnsport.h" +#include "services/cache/infra.h" +#include "util/data/msgparse.h" +#include "util/data/msgreply.h" +#include "util/data/msgencode.h" +#include "util/data/dname.h" +#include "util/netevent.h" +#include "util/log.h" +#include "util/net_help.h" +#include "util/random.h" +#include "util/fptr_wlist.h" +#include "ldns/sbuffer.h" +#include "dnstap/dnstap.h" +#ifdef HAVE_OPENSSL_SSL_H +#include <openssl/ssl.h> +#endif + +#ifdef HAVE_NETDB_H +#include <netdb.h> +#endif +#include <fcntl.h> + +/** number of times to retry making a random ID that is unique. */ +#define MAX_ID_RETRY 1000 +/** number of times to retry finding interface, port that can be opened. */ +#define MAX_PORT_RETRY 10000 +/** number of retries on outgoing UDP queries */ +#define OUTBOUND_UDP_RETRY 1 + +/** initiate TCP transaction for serviced query */ +static void serviced_tcp_initiate(struct serviced_query* sq, sldns_buffer* buff); +/** with a fd available, randomize and send UDP */ +static int randomize_and_send_udp(struct pending* pend, sldns_buffer* packet, + int timeout); + +/** remove waiting tcp from the outnet waiting list */ +static void waiting_list_remove(struct outside_network* outnet, + struct waiting_tcp* w); + +int +pending_cmp(const void* key1, const void* key2) +{ + struct pending *p1 = (struct pending*)key1; + struct pending *p2 = (struct pending*)key2; + if(p1->id < p2->id) + return -1; + if(p1->id > p2->id) + return 1; + log_assert(p1->id == p2->id); + return sockaddr_cmp(&p1->addr, p1->addrlen, &p2->addr, p2->addrlen); +} + +int +serviced_cmp(const void* key1, const void* key2) +{ + struct serviced_query* q1 = (struct serviced_query*)key1; + struct serviced_query* q2 = (struct serviced_query*)key2; + int r; + if(q1->qbuflen < q2->qbuflen) + return -1; + if(q1->qbuflen > q2->qbuflen) + return 1; + log_assert(q1->qbuflen == q2->qbuflen); + log_assert(q1->qbuflen >= 15 /* 10 header, root, type, class */); + /* alternate casing of qname is still the same query */ + if((r = memcmp(q1->qbuf, q2->qbuf, 10)) != 0) + return r; + if((r = memcmp(q1->qbuf+q1->qbuflen-4, q2->qbuf+q2->qbuflen-4, 4)) != 0) + return r; + if(q1->dnssec != q2->dnssec) { + if(q1->dnssec < q2->dnssec) + return -1; + return 1; + } + if((r = query_dname_compare(q1->qbuf+10, q2->qbuf+10)) != 0) + return r; + return sockaddr_cmp(&q1->addr, q1->addrlen, &q2->addr, q2->addrlen); +} + +/** delete waiting_tcp entry. Does not unlink from waiting list. + * @param w: to delete. + */ +static void +waiting_tcp_delete(struct waiting_tcp* w) +{ + if(!w) return; + if(w->timer) + comm_timer_delete(w->timer); + free(w); +} + +/** + * Pick random outgoing-interface of that family, and bind it. + * port set to 0 so OS picks a port number for us. + * if it is the ANY address, do not bind. + * @param w: tcp structure with destination address. + * @param s: socket fd. + * @return false on error, socket closed. + */ +static int +pick_outgoing_tcp(struct waiting_tcp* w, int s) +{ + struct port_if* pi = NULL; + int num; +#ifdef INET6 + if(addr_is_ip6(&w->addr, w->addrlen)) + num = w->outnet->num_ip6; + else +#endif + num = w->outnet->num_ip4; + if(num == 0) { + log_err("no TCP outgoing interfaces of family"); + log_addr(VERB_OPS, "for addr", &w->addr, w->addrlen); +#ifndef USE_WINSOCK + close(s); +#else + closesocket(s); +#endif + return 0; + } +#ifdef INET6 + if(addr_is_ip6(&w->addr, w->addrlen)) + pi = &w->outnet->ip6_ifs[ub_random_max(w->outnet->rnd, num)]; + else +#endif + pi = &w->outnet->ip4_ifs[ub_random_max(w->outnet->rnd, num)]; + log_assert(pi); + if(addr_is_any(&pi->addr, pi->addrlen)) { + /* binding to the ANY interface is for listening sockets */ + return 1; + } + /* set port to 0 */ + if(addr_is_ip6(&pi->addr, pi->addrlen)) + ((struct sockaddr_in6*)&pi->addr)->sin6_port = 0; + else ((struct sockaddr_in*)&pi->addr)->sin_port = 0; + if(bind(s, (struct sockaddr*)&pi->addr, pi->addrlen) != 0) { +#ifndef USE_WINSOCK + log_err("outgoing tcp: bind: %s", strerror(errno)); + close(s); +#else + log_err("outgoing tcp: bind: %s", + wsa_strerror(WSAGetLastError())); + closesocket(s); +#endif + return 0; + } + log_addr(VERB_ALGO, "tcp bound to src", &pi->addr, pi->addrlen); + return 1; +} + +/** use next free buffer to service a tcp query */ +static int +outnet_tcp_take_into_use(struct waiting_tcp* w, uint8_t* pkt, size_t pkt_len) +{ + struct pending_tcp* pend = w->outnet->tcp_free; + int s; + log_assert(pend); + log_assert(pkt); + log_assert(w->addrlen > 0); + /* open socket */ +#ifdef INET6 + if(addr_is_ip6(&w->addr, w->addrlen)) + s = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP); + else +#endif + s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + if(s == -1) { +#ifndef USE_WINSOCK + log_err_addr("outgoing tcp: socket", strerror(errno), + &w->addr, w->addrlen); +#else + log_err_addr("outgoing tcp: socket", + wsa_strerror(WSAGetLastError()), &w->addr, w->addrlen); +#endif + return 0; + } + if(!pick_outgoing_tcp(w, s)) + return 0; + + fd_set_nonblock(s); + if(connect(s, (struct sockaddr*)&w->addr, w->addrlen) == -1) { +#ifndef USE_WINSOCK +#ifdef EINPROGRESS + if(errno != EINPROGRESS) { +#else + if(1) { +#endif + if(tcp_connect_errno_needs_log( + (struct sockaddr*)&w->addr, w->addrlen)) + log_err_addr("outgoing tcp: connect", + strerror(errno), &w->addr, w->addrlen); + close(s); +#else /* USE_WINSOCK */ + if(WSAGetLastError() != WSAEINPROGRESS && + WSAGetLastError() != WSAEWOULDBLOCK) { + closesocket(s); +#endif + return 0; + } + } + if(w->outnet->sslctx && w->ssl_upstream) { + pend->c->ssl = outgoing_ssl_fd(w->outnet->sslctx, s); + if(!pend->c->ssl) { + pend->c->fd = s; + comm_point_close(pend->c); + return 0; + } +#ifdef USE_WINSOCK + comm_point_tcp_win_bio_cb(pend->c, pend->c->ssl); +#endif + pend->c->ssl_shake_state = comm_ssl_shake_write; + } + w->pkt = NULL; + w->next_waiting = (void*)pend; + pend->id = LDNS_ID_WIRE(pkt); + w->outnet->num_tcp_outgoing++; + w->outnet->tcp_free = pend->next_free; + pend->next_free = NULL; + pend->query = w; + pend->c->repinfo.addrlen = w->addrlen; + memcpy(&pend->c->repinfo.addr, &w->addr, w->addrlen); + sldns_buffer_clear(pend->c->buffer); + sldns_buffer_write(pend->c->buffer, pkt, pkt_len); + sldns_buffer_flip(pend->c->buffer); + pend->c->tcp_is_reading = 0; + pend->c->tcp_byte_count = 0; + comm_point_start_listening(pend->c, s, -1); + return 1; +} + +/** see if buffers can be used to service TCP queries */ +static void +use_free_buffer(struct outside_network* outnet) +{ + struct waiting_tcp* w; + while(outnet->tcp_free && outnet->tcp_wait_first + && !outnet->want_to_quit) { + w = outnet->tcp_wait_first; + outnet->tcp_wait_first = w->next_waiting; + if(outnet->tcp_wait_last == w) + outnet->tcp_wait_last = NULL; + if(!outnet_tcp_take_into_use(w, w->pkt, w->pkt_len)) { + comm_point_callback_t* cb = w->cb; + void* cb_arg = w->cb_arg; + waiting_tcp_delete(w); + fptr_ok(fptr_whitelist_pending_tcp(cb)); + (void)(*cb)(NULL, cb_arg, NETEVENT_CLOSED, NULL); + } + } +} + +/** decomission a tcp buffer, closes commpoint and frees waiting_tcp entry */ +static void +decomission_pending_tcp(struct outside_network* outnet, + struct pending_tcp* pend) +{ + if(pend->c->ssl) { +#ifdef HAVE_SSL + SSL_shutdown(pend->c->ssl); + SSL_free(pend->c->ssl); + pend->c->ssl = NULL; +#endif + } + comm_point_close(pend->c); + pend->next_free = outnet->tcp_free; + outnet->tcp_free = pend; + waiting_tcp_delete(pend->query); + pend->query = NULL; + use_free_buffer(outnet); +} + +int +outnet_tcp_cb(struct comm_point* c, void* arg, int error, + struct comm_reply *reply_info) +{ + struct pending_tcp* pend = (struct pending_tcp*)arg; + struct outside_network* outnet = pend->query->outnet; + verbose(VERB_ALGO, "outnettcp cb"); + if(error != NETEVENT_NOERROR) { + verbose(VERB_QUERY, "outnettcp got tcp error %d", error); + /* pass error below and exit */ + } else { + /* check ID */ + if(sldns_buffer_limit(c->buffer) < sizeof(uint16_t) || + LDNS_ID_WIRE(sldns_buffer_begin(c->buffer))!=pend->id) { + log_addr(VERB_QUERY, + "outnettcp: bad ID in reply, from:", + &pend->query->addr, pend->query->addrlen); + error = NETEVENT_CLOSED; + } + } + fptr_ok(fptr_whitelist_pending_tcp(pend->query->cb)); + (void)(*pend->query->cb)(c, pend->query->cb_arg, error, reply_info); + decomission_pending_tcp(outnet, pend); + return 0; +} + +/** lower use count on pc, see if it can be closed */ +static void +portcomm_loweruse(struct outside_network* outnet, struct port_comm* pc) +{ + struct port_if* pif; + pc->num_outstanding--; + if(pc->num_outstanding > 0) { + return; + } + /* close it and replace in unused list */ + verbose(VERB_ALGO, "close of port %d", pc->number); + comm_point_close(pc->cp); + pif = pc->pif; + log_assert(pif->inuse > 0); + pif->avail_ports[pif->avail_total - pif->inuse] = pc->number; + pif->inuse--; + pif->out[pc->index] = pif->out[pif->inuse]; + pif->out[pc->index]->index = pc->index; + pc->next = outnet->unused_fds; + outnet->unused_fds = pc; +} + +/** try to send waiting UDP queries */ +static void +outnet_send_wait_udp(struct outside_network* outnet) +{ + struct pending* pend; + /* process waiting queries */ + while(outnet->udp_wait_first && outnet->unused_fds + && !outnet->want_to_quit) { + pend = outnet->udp_wait_first; + outnet->udp_wait_first = pend->next_waiting; + if(!pend->next_waiting) outnet->udp_wait_last = NULL; + sldns_buffer_clear(outnet->udp_buff); + sldns_buffer_write(outnet->udp_buff, pend->pkt, pend->pkt_len); + sldns_buffer_flip(outnet->udp_buff); + free(pend->pkt); /* freeing now makes get_mem correct */ + pend->pkt = NULL; + pend->pkt_len = 0; + if(!randomize_and_send_udp(pend, outnet->udp_buff, + pend->timeout)) { + /* callback error on pending */ + if(pend->cb) { + fptr_ok(fptr_whitelist_pending_udp(pend->cb)); + (void)(*pend->cb)(outnet->unused_fds->cp, pend->cb_arg, + NETEVENT_CLOSED, NULL); + } + pending_delete(outnet, pend); + } + } +} + +int +outnet_udp_cb(struct comm_point* c, void* arg, int error, + struct comm_reply *reply_info) +{ + struct outside_network* outnet = (struct outside_network*)arg; + struct pending key; + struct pending* p; + verbose(VERB_ALGO, "answer cb"); + + if(error != NETEVENT_NOERROR) { + verbose(VERB_QUERY, "outnetudp got udp error %d", error); + return 0; + } + if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { + verbose(VERB_QUERY, "outnetudp udp too short"); + return 0; + } + log_assert(reply_info); + + /* setup lookup key */ + key.id = (unsigned)LDNS_ID_WIRE(sldns_buffer_begin(c->buffer)); + memcpy(&key.addr, &reply_info->addr, reply_info->addrlen); + key.addrlen = reply_info->addrlen; + verbose(VERB_ALGO, "Incoming reply id = %4.4x", key.id); + log_addr(VERB_ALGO, "Incoming reply addr =", + &reply_info->addr, reply_info->addrlen); + + /* find it, see if this thing is a valid query response */ + verbose(VERB_ALGO, "lookup size is %d entries", (int)outnet->pending->count); + p = (struct pending*)rbtree_search(outnet->pending, &key); + if(!p) { + verbose(VERB_QUERY, "received unwanted or unsolicited udp reply dropped."); + log_buf(VERB_ALGO, "dropped message", c->buffer); + outnet->unwanted_replies++; + if(outnet->unwanted_threshold && ++outnet->unwanted_total + >= outnet->unwanted_threshold) { + log_warn("unwanted reply total reached threshold (%u)" + " you may be under attack." + " defensive action: clearing the cache", + (unsigned)outnet->unwanted_threshold); + fptr_ok(fptr_whitelist_alloc_cleanup( + outnet->unwanted_action)); + (*outnet->unwanted_action)(outnet->unwanted_param); + outnet->unwanted_total = 0; + } + return 0; + } + + verbose(VERB_ALGO, "received udp reply."); + log_buf(VERB_ALGO, "udp message", c->buffer); + if(p->pc->cp != c) { + verbose(VERB_QUERY, "received reply id,addr on wrong port. " + "dropped."); + outnet->unwanted_replies++; + if(outnet->unwanted_threshold && ++outnet->unwanted_total + >= outnet->unwanted_threshold) { + log_warn("unwanted reply total reached threshold (%u)" + " you may be under attack." + " defensive action: clearing the cache", + (unsigned)outnet->unwanted_threshold); + fptr_ok(fptr_whitelist_alloc_cleanup( + outnet->unwanted_action)); + (*outnet->unwanted_action)(outnet->unwanted_param); + outnet->unwanted_total = 0; + } + return 0; + } + comm_timer_disable(p->timer); + verbose(VERB_ALGO, "outnet handle udp reply"); + /* delete from tree first in case callback creates a retry */ + (void)rbtree_delete(outnet->pending, p->node.key); + if(p->cb) { + fptr_ok(fptr_whitelist_pending_udp(p->cb)); + (void)(*p->cb)(p->pc->cp, p->cb_arg, NETEVENT_NOERROR, reply_info); + } + portcomm_loweruse(outnet, p->pc); + pending_delete(NULL, p); + outnet_send_wait_udp(outnet); + return 0; +} + +/** calculate number of ip4 and ip6 interfaces*/ +static void +calc_num46(char** ifs, int num_ifs, int do_ip4, int do_ip6, + int* num_ip4, int* num_ip6) +{ + int i; + *num_ip4 = 0; + *num_ip6 = 0; + if(num_ifs <= 0) { + if(do_ip4) + *num_ip4 = 1; + if(do_ip6) + *num_ip6 = 1; + return; + } + for(i=0; i<num_ifs; i++) + { + if(str_is_ip6(ifs[i])) { + if(do_ip6) + (*num_ip6)++; + } else { + if(do_ip4) + (*num_ip4)++; + } + } + +} + +void +pending_udp_timer_delay_cb(void* arg) +{ + struct pending* p = (struct pending*)arg; + struct outside_network* outnet = p->outnet; + verbose(VERB_ALGO, "timeout udp with delay"); + portcomm_loweruse(outnet, p->pc); + pending_delete(outnet, p); + outnet_send_wait_udp(outnet); +} + +void +pending_udp_timer_cb(void *arg) +{ + struct pending* p = (struct pending*)arg; + struct outside_network* outnet = p->outnet; + /* it timed out */ + verbose(VERB_ALGO, "timeout udp"); + if(p->cb) { + fptr_ok(fptr_whitelist_pending_udp(p->cb)); + (void)(*p->cb)(p->pc->cp, p->cb_arg, NETEVENT_TIMEOUT, NULL); + } + /* if delayclose, keep port open for a longer time. + * But if the udpwaitlist exists, then we are struggling to + * keep up with demand for sockets, so do not wait, but service + * the customer (customer service more important than portICMPs) */ + if(outnet->delayclose && !outnet->udp_wait_first) { + p->cb = NULL; + p->timer->callback = &pending_udp_timer_delay_cb; + comm_timer_set(p->timer, &outnet->delay_tv); + return; + } + portcomm_loweruse(outnet, p->pc); + pending_delete(outnet, p); + outnet_send_wait_udp(outnet); +} + +/** create pending_tcp buffers */ +static int +create_pending_tcp(struct outside_network* outnet, size_t bufsize) +{ + size_t i; + if(outnet->num_tcp == 0) + return 1; /* no tcp needed, nothing to do */ + if(!(outnet->tcp_conns = (struct pending_tcp **)calloc( + outnet->num_tcp, sizeof(struct pending_tcp*)))) + return 0; + for(i=0; i<outnet->num_tcp; i++) { + if(!(outnet->tcp_conns[i] = (struct pending_tcp*)calloc(1, + sizeof(struct pending_tcp)))) + return 0; + outnet->tcp_conns[i]->next_free = outnet->tcp_free; + outnet->tcp_free = outnet->tcp_conns[i]; + outnet->tcp_conns[i]->c = comm_point_create_tcp_out( + outnet->base, bufsize, outnet_tcp_cb, + outnet->tcp_conns[i]); + if(!outnet->tcp_conns[i]->c) + return 0; + } + return 1; +} + +/** setup an outgoing interface, ready address */ +static int setup_if(struct port_if* pif, const char* addrstr, + int* avail, int numavail, size_t numfd) +{ + pif->avail_total = numavail; + pif->avail_ports = (int*)memdup(avail, (size_t)numavail*sizeof(int)); + if(!pif->avail_ports) + return 0; + if(!ipstrtoaddr(addrstr, UNBOUND_DNS_PORT, &pif->addr, &pif->addrlen)) + return 0; + pif->maxout = (int)numfd; + pif->inuse = 0; + pif->out = (struct port_comm**)calloc(numfd, + sizeof(struct port_comm*)); + if(!pif->out) + return 0; + return 1; +} + +struct outside_network* +outside_network_create(struct comm_base *base, size_t bufsize, + size_t num_ports, char** ifs, int num_ifs, int do_ip4, + int do_ip6, size_t num_tcp, struct infra_cache* infra, + struct ub_randstate* rnd, int use_caps_for_id, int* availports, + int numavailports, size_t unwanted_threshold, + void (*unwanted_action)(void*), void* unwanted_param, int do_udp, + void* sslctx, int delayclose, struct dt_env* dtenv) +{ + struct outside_network* outnet = (struct outside_network*) + calloc(1, sizeof(struct outside_network)); + size_t k; + if(!outnet) { + log_err("malloc failed"); + return NULL; + } + comm_base_timept(base, &outnet->now_secs, &outnet->now_tv); + outnet->base = base; + outnet->num_tcp = num_tcp; + outnet->num_tcp_outgoing = 0; + outnet->infra = infra; + outnet->rnd = rnd; + outnet->sslctx = sslctx; +#ifdef USE_DNSTAP + outnet->dtenv = dtenv; +#else + (void)dtenv; +#endif + outnet->svcd_overhead = 0; + outnet->want_to_quit = 0; + outnet->unwanted_threshold = unwanted_threshold; + outnet->unwanted_action = unwanted_action; + outnet->unwanted_param = unwanted_param; + outnet->use_caps_for_id = use_caps_for_id; + outnet->do_udp = do_udp; +#ifndef S_SPLINT_S + if(delayclose) { + outnet->delayclose = 1; + outnet->delay_tv.tv_sec = delayclose/1000; + outnet->delay_tv.tv_usec = (delayclose%1000)*1000; + } +#endif + if(numavailports == 0) { + log_err("no outgoing ports available"); + outside_network_delete(outnet); + return NULL; + } +#ifndef INET6 + do_ip6 = 0; +#endif + calc_num46(ifs, num_ifs, do_ip4, do_ip6, + &outnet->num_ip4, &outnet->num_ip6); + if(outnet->num_ip4 != 0) { + if(!(outnet->ip4_ifs = (struct port_if*)calloc( + (size_t)outnet->num_ip4, sizeof(struct port_if)))) { + log_err("malloc failed"); + outside_network_delete(outnet); + return NULL; + } + } + if(outnet->num_ip6 != 0) { + if(!(outnet->ip6_ifs = (struct port_if*)calloc( + (size_t)outnet->num_ip6, sizeof(struct port_if)))) { + log_err("malloc failed"); + outside_network_delete(outnet); + return NULL; + } + } + if( !(outnet->udp_buff = sldns_buffer_new(bufsize)) || + !(outnet->pending = rbtree_create(pending_cmp)) || + !(outnet->serviced = rbtree_create(serviced_cmp)) || + !create_pending_tcp(outnet, bufsize)) { + log_err("malloc failed"); + outside_network_delete(outnet); + return NULL; + } + + /* allocate commpoints */ + for(k=0; k<num_ports; k++) { + struct port_comm* pc; + pc = (struct port_comm*)calloc(1, sizeof(*pc)); + if(!pc) { + log_err("malloc failed"); + outside_network_delete(outnet); + return NULL; + } + pc->cp = comm_point_create_udp(outnet->base, -1, + outnet->udp_buff, outnet_udp_cb, outnet); + if(!pc->cp) { + log_err("malloc failed"); + free(pc); + outside_network_delete(outnet); + return NULL; + } + pc->next = outnet->unused_fds; + outnet->unused_fds = pc; + } + + /* allocate interfaces */ + if(num_ifs == 0) { + if(do_ip4 && !setup_if(&outnet->ip4_ifs[0], "0.0.0.0", + availports, numavailports, num_ports)) { + log_err("malloc failed"); + outside_network_delete(outnet); + return NULL; + } + if(do_ip6 && !setup_if(&outnet->ip6_ifs[0], "::", + availports, numavailports, num_ports)) { + log_err("malloc failed"); + outside_network_delete(outnet); + return NULL; + } + } else { + size_t done_4 = 0, done_6 = 0; + int i; + for(i=0; i<num_ifs; i++) { + if(str_is_ip6(ifs[i]) && do_ip6) { + if(!setup_if(&outnet->ip6_ifs[done_6], ifs[i], + availports, numavailports, num_ports)){ + log_err("malloc failed"); + outside_network_delete(outnet); + return NULL; + } + done_6++; + } + if(!str_is_ip6(ifs[i]) && do_ip4) { + if(!setup_if(&outnet->ip4_ifs[done_4], ifs[i], + availports, numavailports, num_ports)){ + log_err("malloc failed"); + outside_network_delete(outnet); + return NULL; + } + done_4++; + } + } + } + return outnet; +} + +/** helper pending delete */ +static void +pending_node_del(rbnode_t* node, void* arg) +{ + struct pending* pend = (struct pending*)node; + struct outside_network* outnet = (struct outside_network*)arg; + pending_delete(outnet, pend); +} + +/** helper serviced delete */ +static void +serviced_node_del(rbnode_t* node, void* ATTR_UNUSED(arg)) +{ + struct serviced_query* sq = (struct serviced_query*)node; + struct service_callback* p = sq->cblist, *np; + free(sq->qbuf); + free(sq->zone); + while(p) { + np = p->next; + free(p); + p = np; + } + free(sq); +} + +void +outside_network_quit_prepare(struct outside_network* outnet) +{ + if(!outnet) + return; + /* prevent queued items from being sent */ + outnet->want_to_quit = 1; +} + +void +outside_network_delete(struct outside_network* outnet) +{ + if(!outnet) + return; + outnet->want_to_quit = 1; + /* check every element, since we can be called on malloc error */ + if(outnet->pending) { + /* free pending elements, but do no unlink from tree. */ + traverse_postorder(outnet->pending, pending_node_del, NULL); + free(outnet->pending); + } + if(outnet->serviced) { + traverse_postorder(outnet->serviced, serviced_node_del, NULL); + free(outnet->serviced); + } + if(outnet->udp_buff) + sldns_buffer_free(outnet->udp_buff); + if(outnet->unused_fds) { + struct port_comm* p = outnet->unused_fds, *np; + while(p) { + np = p->next; + comm_point_delete(p->cp); + free(p); + p = np; + } + outnet->unused_fds = NULL; + } + if(outnet->ip4_ifs) { + int i, k; + for(i=0; i<outnet->num_ip4; i++) { + for(k=0; k<outnet->ip4_ifs[i].inuse; k++) { + struct port_comm* pc = outnet->ip4_ifs[i]. + out[k]; + comm_point_delete(pc->cp); + free(pc); + } + free(outnet->ip4_ifs[i].avail_ports); + free(outnet->ip4_ifs[i].out); + } + free(outnet->ip4_ifs); + } + if(outnet->ip6_ifs) { + int i, k; + for(i=0; i<outnet->num_ip6; i++) { + for(k=0; k<outnet->ip6_ifs[i].inuse; k++) { + struct port_comm* pc = outnet->ip6_ifs[i]. + out[k]; + comm_point_delete(pc->cp); + free(pc); + } + free(outnet->ip6_ifs[i].avail_ports); + free(outnet->ip6_ifs[i].out); + } + free(outnet->ip6_ifs); + } + if(outnet->tcp_conns) { + size_t i; + for(i=0; i<outnet->num_tcp; i++) + if(outnet->tcp_conns[i]) { + comm_point_delete(outnet->tcp_conns[i]->c); + waiting_tcp_delete(outnet->tcp_conns[i]->query); + free(outnet->tcp_conns[i]); + } + free(outnet->tcp_conns); + } + if(outnet->tcp_wait_first) { + struct waiting_tcp* p = outnet->tcp_wait_first, *np; + while(p) { + np = p->next_waiting; + waiting_tcp_delete(p); + p = np; + } + } + if(outnet->udp_wait_first) { + struct pending* p = outnet->udp_wait_first, *np; + while(p) { + np = p->next_waiting; + pending_delete(NULL, p); + p = np; + } + } + free(outnet); +} + +void +pending_delete(struct outside_network* outnet, struct pending* p) +{ + if(!p) + return; + if(outnet && outnet->udp_wait_first && + (p->next_waiting || p == outnet->udp_wait_last) ) { + /* delete from waiting list, if it is in the waiting list */ + struct pending* prev = NULL, *x = outnet->udp_wait_first; + while(x && x != p) { + prev = x; + x = x->next_waiting; + } + if(x) { + log_assert(x == p); + if(prev) + prev->next_waiting = p->next_waiting; + else outnet->udp_wait_first = p->next_waiting; + if(outnet->udp_wait_last == p) + outnet->udp_wait_last = prev; + } + } + if(outnet) { + (void)rbtree_delete(outnet->pending, p->node.key); + } + if(p->timer) + comm_timer_delete(p->timer); + free(p->pkt); + free(p); +} + +/** + * Try to open a UDP socket for outgoing communication. + * Sets sockets options as needed. + * @param addr: socket address. + * @param addrlen: length of address. + * @param port: port override for addr. + * @param inuse: if -1 is returned, this bool means the port was in use. + * @return fd or -1 + */ +static int +udp_sockport(struct sockaddr_storage* addr, socklen_t addrlen, int port, + int* inuse) +{ + int fd, noproto; + if(addr_is_ip6(addr, addrlen)) { + struct sockaddr_in6* sa = (struct sockaddr_in6*)addr; + sa->sin6_port = (in_port_t)htons((uint16_t)port); + fd = create_udp_sock(AF_INET6, SOCK_DGRAM, + (struct sockaddr*)addr, addrlen, 1, inuse, &noproto, + 0, 0, 0, NULL); + } else { + struct sockaddr_in* sa = (struct sockaddr_in*)addr; + sa->sin_port = (in_port_t)htons((uint16_t)port); + fd = create_udp_sock(AF_INET, SOCK_DGRAM, + (struct sockaddr*)addr, addrlen, 1, inuse, &noproto, + 0, 0, 0, NULL); + } + return fd; +} + +/** Select random ID */ +static int +select_id(struct outside_network* outnet, struct pending* pend, + sldns_buffer* packet) +{ + int id_tries = 0; + pend->id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff; + LDNS_ID_SET(sldns_buffer_begin(packet), pend->id); + + /* insert in tree */ + pend->node.key = pend; + while(!rbtree_insert(outnet->pending, &pend->node)) { + /* change ID to avoid collision */ + pend->id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff; + LDNS_ID_SET(sldns_buffer_begin(packet), pend->id); + id_tries++; + if(id_tries == MAX_ID_RETRY) { + pend->id=99999; /* non existant ID */ + log_err("failed to generate unique ID, drop msg"); + return 0; + } + } + verbose(VERB_ALGO, "inserted new pending reply id=%4.4x", pend->id); + return 1; +} + +/** Select random interface and port */ +static int +select_ifport(struct outside_network* outnet, struct pending* pend, + int num_if, struct port_if* ifs) +{ + int my_if, my_port, fd, portno, inuse, tries=0; + struct port_if* pif; + /* randomly select interface and port */ + if(num_if == 0) { + verbose(VERB_QUERY, "Need to send query but have no " + "outgoing interfaces of that family"); + return 0; + } + log_assert(outnet->unused_fds); + tries = 0; + while(1) { + my_if = ub_random_max(outnet->rnd, num_if); + pif = &ifs[my_if]; + my_port = ub_random_max(outnet->rnd, pif->avail_total); + if(my_port < pif->inuse) { + /* port already open */ + pend->pc = pif->out[my_port]; + verbose(VERB_ALGO, "using UDP if=%d port=%d", + my_if, pend->pc->number); + break; + } + /* try to open new port, if fails, loop to try again */ + log_assert(pif->inuse < pif->maxout); + portno = pif->avail_ports[my_port - pif->inuse]; + fd = udp_sockport(&pif->addr, pif->addrlen, portno, &inuse); + if(fd == -1 && !inuse) { + /* nonrecoverable error making socket */ + return 0; + } + if(fd != -1) { + verbose(VERB_ALGO, "opened UDP if=%d port=%d", + my_if, portno); + /* grab fd */ + pend->pc = outnet->unused_fds; + outnet->unused_fds = pend->pc->next; + + /* setup portcomm */ + pend->pc->next = NULL; + pend->pc->number = portno; + pend->pc->pif = pif; + pend->pc->index = pif->inuse; + pend->pc->num_outstanding = 0; + comm_point_start_listening(pend->pc->cp, fd, -1); + + /* grab port in interface */ + pif->out[pif->inuse] = pend->pc; + pif->avail_ports[my_port - pif->inuse] = + pif->avail_ports[pif->avail_total-pif->inuse-1]; + pif->inuse++; + break; + } + /* failed, already in use */ + verbose(VERB_QUERY, "port %d in use, trying another", portno); + tries++; + if(tries == MAX_PORT_RETRY) { + log_err("failed to find an open port, drop msg"); + return 0; + } + } + log_assert(pend->pc); + pend->pc->num_outstanding++; + + return 1; +} + +static int +randomize_and_send_udp(struct pending* pend, sldns_buffer* packet, int timeout) +{ + struct timeval tv; + struct outside_network* outnet = pend->sq->outnet; + + /* select id */ + if(!select_id(outnet, pend, packet)) { + return 0; + } + + /* select src_if, port */ + if(addr_is_ip6(&pend->addr, pend->addrlen)) { + if(!select_ifport(outnet, pend, + outnet->num_ip6, outnet->ip6_ifs)) + return 0; + } else { + if(!select_ifport(outnet, pend, + outnet->num_ip4, outnet->ip4_ifs)) + return 0; + } + log_assert(pend->pc && pend->pc->cp); + + /* send it over the commlink */ + if(!comm_point_send_udp_msg(pend->pc->cp, packet, + (struct sockaddr*)&pend->addr, pend->addrlen)) { + portcomm_loweruse(outnet, pend->pc); + return 0; + } + + /* system calls to set timeout after sending UDP to make roundtrip + smaller. */ +#ifndef S_SPLINT_S + tv.tv_sec = timeout/1000; + tv.tv_usec = (timeout%1000)*1000; +#endif + comm_timer_set(pend->timer, &tv); + +#ifdef USE_DNSTAP + if(outnet->dtenv && + (outnet->dtenv->log_resolver_query_messages || + outnet->dtenv->log_forwarder_query_messages)) + dt_msg_send_outside_query(outnet->dtenv, &pend->addr, comm_udp, + pend->sq->zone, pend->sq->zonelen, packet); +#endif + return 1; +} + +struct pending* +pending_udp_query(struct serviced_query* sq, struct sldns_buffer* packet, + int timeout, comm_point_callback_t* cb, void* cb_arg) +{ + struct pending* pend = (struct pending*)calloc(1, sizeof(*pend)); + if(!pend) return NULL; + pend->outnet = sq->outnet; + pend->sq = sq; + pend->addrlen = sq->addrlen; + memmove(&pend->addr, &sq->addr, sq->addrlen); + pend->cb = cb; + pend->cb_arg = cb_arg; + pend->node.key = pend; + pend->timer = comm_timer_create(sq->outnet->base, pending_udp_timer_cb, + pend); + if(!pend->timer) { + free(pend); + return NULL; + } + + if(sq->outnet->unused_fds == NULL) { + /* no unused fd, cannot create a new port (randomly) */ + verbose(VERB_ALGO, "no fds available, udp query waiting"); + pend->timeout = timeout; + pend->pkt_len = sldns_buffer_limit(packet); + pend->pkt = (uint8_t*)memdup(sldns_buffer_begin(packet), + pend->pkt_len); + if(!pend->pkt) { + comm_timer_delete(pend->timer); + free(pend); + return NULL; + } + /* put at end of waiting list */ + if(sq->outnet->udp_wait_last) + sq->outnet->udp_wait_last->next_waiting = pend; + else + sq->outnet->udp_wait_first = pend; + sq->outnet->udp_wait_last = pend; + return pend; + } + if(!randomize_and_send_udp(pend, packet, timeout)) { + pending_delete(sq->outnet, pend); + return NULL; + } + return pend; +} + +void +outnet_tcptimer(void* arg) +{ + struct waiting_tcp* w = (struct waiting_tcp*)arg; + struct outside_network* outnet = w->outnet; + comm_point_callback_t* cb; + void* cb_arg; + if(w->pkt) { + /* it is on the waiting list */ + waiting_list_remove(outnet, w); + } else { + /* it was in use */ + struct pending_tcp* pend=(struct pending_tcp*)w->next_waiting; + comm_point_close(pend->c); + pend->query = NULL; + pend->next_free = outnet->tcp_free; + outnet->tcp_free = pend; + } + cb = w->cb; + cb_arg = w->cb_arg; + waiting_tcp_delete(w); + fptr_ok(fptr_whitelist_pending_tcp(cb)); + (void)(*cb)(NULL, cb_arg, NETEVENT_TIMEOUT, NULL); + use_free_buffer(outnet); +} + +struct waiting_tcp* +pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet, + int timeout, comm_point_callback_t* callback, void* callback_arg) +{ + struct pending_tcp* pend = sq->outnet->tcp_free; + struct waiting_tcp* w; + struct timeval tv; + uint16_t id; + /* if no buffer is free allocate space to store query */ + w = (struct waiting_tcp*)malloc(sizeof(struct waiting_tcp) + + (pend?0:sldns_buffer_limit(packet))); + if(!w) { + return NULL; + } + if(!(w->timer = comm_timer_create(sq->outnet->base, outnet_tcptimer, w))) { + free(w); + return NULL; + } + w->pkt = NULL; + w->pkt_len = 0; + id = ((unsigned)ub_random(sq->outnet->rnd)>>8) & 0xffff; + LDNS_ID_SET(sldns_buffer_begin(packet), id); + memcpy(&w->addr, &sq->addr, sq->addrlen); + w->addrlen = sq->addrlen; + w->outnet = sq->outnet; + w->cb = callback; + w->cb_arg = callback_arg; + w->ssl_upstream = sq->ssl_upstream; +#ifndef S_SPLINT_S + tv.tv_sec = timeout; + tv.tv_usec = 0; +#endif + comm_timer_set(w->timer, &tv); + if(pend) { + /* we have a buffer available right now */ + if(!outnet_tcp_take_into_use(w, sldns_buffer_begin(packet), + sldns_buffer_limit(packet))) { + waiting_tcp_delete(w); + return NULL; + } +#ifdef USE_DNSTAP + if(sq->outnet->dtenv && + (sq->outnet->dtenv->log_resolver_query_messages || + sq->outnet->dtenv->log_forwarder_query_messages)) + dt_msg_send_outside_query(sq->outnet->dtenv, &sq->addr, + comm_tcp, sq->zone, sq->zonelen, packet); +#endif + } else { + /* queue up */ + w->pkt = (uint8_t*)w + sizeof(struct waiting_tcp); + w->pkt_len = sldns_buffer_limit(packet); + memmove(w->pkt, sldns_buffer_begin(packet), w->pkt_len); + w->next_waiting = NULL; + if(sq->outnet->tcp_wait_last) + sq->outnet->tcp_wait_last->next_waiting = w; + else sq->outnet->tcp_wait_first = w; + sq->outnet->tcp_wait_last = w; + } + return w; +} + +/** create query for serviced queries */ +static void +serviced_gen_query(sldns_buffer* buff, uint8_t* qname, size_t qnamelen, + uint16_t qtype, uint16_t qclass, uint16_t flags) +{ + sldns_buffer_clear(buff); + /* skip id */ + sldns_buffer_write_u16(buff, flags); + sldns_buffer_write_u16(buff, 1); /* qdcount */ + sldns_buffer_write_u16(buff, 0); /* ancount */ + sldns_buffer_write_u16(buff, 0); /* nscount */ + sldns_buffer_write_u16(buff, 0); /* arcount */ + sldns_buffer_write(buff, qname, qnamelen); + sldns_buffer_write_u16(buff, qtype); + sldns_buffer_write_u16(buff, qclass); + sldns_buffer_flip(buff); +} + +/** lookup serviced query in serviced query rbtree */ +static struct serviced_query* +lookup_serviced(struct outside_network* outnet, sldns_buffer* buff, int dnssec, + struct sockaddr_storage* addr, socklen_t addrlen) +{ + struct serviced_query key; + key.node.key = &key; + key.qbuf = sldns_buffer_begin(buff); + key.qbuflen = sldns_buffer_limit(buff); + key.dnssec = dnssec; + memcpy(&key.addr, addr, addrlen); + key.addrlen = addrlen; + key.outnet = outnet; + return (struct serviced_query*)rbtree_search(outnet->serviced, &key); +} + +/** Create new serviced entry */ +static struct serviced_query* +serviced_create(struct outside_network* outnet, sldns_buffer* buff, int dnssec, + int want_dnssec, int nocaps, int tcp_upstream, int ssl_upstream, + struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* zone, + size_t zonelen, int qtype) +{ + struct serviced_query* sq = (struct serviced_query*)malloc(sizeof(*sq)); +#ifdef UNBOUND_DEBUG + rbnode_t* ins; +#endif + if(!sq) + return NULL; + sq->node.key = sq; + sq->qbuf = memdup(sldns_buffer_begin(buff), sldns_buffer_limit(buff)); + if(!sq->qbuf) { + free(sq); + return NULL; + } + sq->qbuflen = sldns_buffer_limit(buff); + sq->zone = memdup(zone, zonelen); + if(!sq->zone) { + free(sq->qbuf); + free(sq); + return NULL; + } + sq->zonelen = zonelen; + sq->qtype = qtype; + sq->dnssec = dnssec; + sq->want_dnssec = want_dnssec; + sq->nocaps = nocaps; + sq->tcp_upstream = tcp_upstream; + sq->ssl_upstream = ssl_upstream; + memcpy(&sq->addr, addr, addrlen); + sq->addrlen = addrlen; + sq->outnet = outnet; + sq->cblist = NULL; + sq->pending = NULL; + sq->status = serviced_initial; + sq->retry = 0; + sq->to_be_deleted = 0; +#ifdef UNBOUND_DEBUG + ins = +#else + (void) +#endif + rbtree_insert(outnet->serviced, &sq->node); + log_assert(ins != NULL); /* must not be already present */ + return sq; +} + +/** remove waiting tcp from the outnet waiting list */ +static void +waiting_list_remove(struct outside_network* outnet, struct waiting_tcp* w) +{ + struct waiting_tcp* p = outnet->tcp_wait_first, *prev = NULL; + while(p) { + if(p == w) { + /* remove w */ + if(prev) + prev->next_waiting = w->next_waiting; + else outnet->tcp_wait_first = w->next_waiting; + if(outnet->tcp_wait_last == w) + outnet->tcp_wait_last = prev; + return; + } + prev = p; + p = p->next_waiting; + } +} + +/** cleanup serviced query entry */ +static void +serviced_delete(struct serviced_query* sq) +{ + if(sq->pending) { + /* clear up the pending query */ + if(sq->status == serviced_query_UDP_EDNS || + sq->status == serviced_query_UDP || + sq->status == serviced_query_PROBE_EDNS || + sq->status == serviced_query_UDP_EDNS_FRAG || + sq->status == serviced_query_UDP_EDNS_fallback) { + struct pending* p = (struct pending*)sq->pending; + if(p->pc) + portcomm_loweruse(sq->outnet, p->pc); + pending_delete(sq->outnet, p); + /* this call can cause reentrant calls back into the + * mesh */ + outnet_send_wait_udp(sq->outnet); + } else { + struct waiting_tcp* p = (struct waiting_tcp*) + sq->pending; + if(p->pkt == NULL) { + decomission_pending_tcp(sq->outnet, + (struct pending_tcp*)p->next_waiting); + } else { + waiting_list_remove(sq->outnet, p); + waiting_tcp_delete(p); + } + } + } + /* does not delete from tree, caller has to do that */ + serviced_node_del(&sq->node, NULL); +} + +/** perturb a dname capitalization randomly */ +static void +serviced_perturb_qname(struct ub_randstate* rnd, uint8_t* qbuf, size_t len) +{ + uint8_t lablen; + uint8_t* d = qbuf + 10; + long int random = 0; + int bits = 0; + log_assert(len >= 10 + 5 /* offset qname, root, qtype, qclass */); + lablen = *d++; + while(lablen) { + while(lablen--) { + /* only perturb A-Z, a-z */ + if(isalpha((int)*d)) { + /* get a random bit */ + if(bits == 0) { + random = ub_random(rnd); + bits = 30; + } + if(random & 0x1) { + *d = (uint8_t)toupper((int)*d); + } else { + *d = (uint8_t)tolower((int)*d); + } + random >>= 1; + bits--; + } + d++; + } + lablen = *d++; + } + if(verbosity >= VERB_ALGO) { + char buf[LDNS_MAX_DOMAINLEN+1]; + dname_str(qbuf+10, buf); + verbose(VERB_ALGO, "qname perturbed to %s", buf); + } +} + +/** put serviced query into a buffer */ +static void +serviced_encode(struct serviced_query* sq, sldns_buffer* buff, int with_edns) +{ + /* if we are using 0x20 bits for ID randomness, perturb them */ + if(sq->outnet->use_caps_for_id && !sq->nocaps) { + serviced_perturb_qname(sq->outnet->rnd, sq->qbuf, sq->qbuflen); + } + /* generate query */ + sldns_buffer_clear(buff); + sldns_buffer_write_u16(buff, 0); /* id placeholder */ + sldns_buffer_write(buff, sq->qbuf, sq->qbuflen); + sldns_buffer_flip(buff); + if(with_edns) { + /* add edns section */ + struct edns_data edns; + edns.edns_present = 1; + edns.ext_rcode = 0; + edns.edns_version = EDNS_ADVERTISED_VERSION; + if(sq->status == serviced_query_UDP_EDNS_FRAG) { + if(addr_is_ip6(&sq->addr, sq->addrlen)) { + if(EDNS_FRAG_SIZE_IP6 < EDNS_ADVERTISED_SIZE) + edns.udp_size = EDNS_FRAG_SIZE_IP6; + else edns.udp_size = EDNS_ADVERTISED_SIZE; + } else { + if(EDNS_FRAG_SIZE_IP4 < EDNS_ADVERTISED_SIZE) + edns.udp_size = EDNS_FRAG_SIZE_IP4; + else edns.udp_size = EDNS_ADVERTISED_SIZE; + } + } else { + edns.udp_size = EDNS_ADVERTISED_SIZE; + } + edns.bits = 0; + if(sq->dnssec & EDNS_DO) + edns.bits = EDNS_DO; + if(sq->dnssec & BIT_CD) + LDNS_CD_SET(sldns_buffer_begin(buff)); + attach_edns_record(buff, &edns); + } +} + +/** + * Perform serviced query UDP sending operation. + * Sends UDP with EDNS, unless infra host marked non EDNS. + * @param sq: query to send. + * @param buff: buffer scratch space. + * @return 0 on error. + */ +static int +serviced_udp_send(struct serviced_query* sq, sldns_buffer* buff) +{ + int rtt, vs; + uint8_t edns_lame_known; + time_t now = *sq->outnet->now_secs; + + if(!infra_host(sq->outnet->infra, &sq->addr, sq->addrlen, sq->zone, + sq->zonelen, now, &vs, &edns_lame_known, &rtt)) + return 0; + sq->last_rtt = rtt; + verbose(VERB_ALGO, "EDNS lookup known=%d vs=%d", edns_lame_known, vs); + if(sq->status == serviced_initial) { + if(edns_lame_known == 0 && rtt > 5000 && rtt < 10001) { + /* perform EDNS lame probe - check if server is + * EDNS lame (EDNS queries to it are dropped) */ + verbose(VERB_ALGO, "serviced query: send probe to see " + " if use of EDNS causes timeouts"); + /* even 700 msec may be too small */ + rtt = 1000; + sq->status = serviced_query_PROBE_EDNS; + } else if(vs != -1) { + sq->status = serviced_query_UDP_EDNS; + } else { + sq->status = serviced_query_UDP; + } + } + serviced_encode(sq, buff, (sq->status == serviced_query_UDP_EDNS) || + (sq->status == serviced_query_UDP_EDNS_FRAG)); + sq->last_sent_time = *sq->outnet->now_tv; + sq->edns_lame_known = (int)edns_lame_known; + verbose(VERB_ALGO, "serviced query UDP timeout=%d msec", rtt); + sq->pending = pending_udp_query(sq, buff, rtt, + serviced_udp_callback, sq); + if(!sq->pending) + return 0; + return 1; +} + +/** check that perturbed qname is identical */ +static int +serviced_check_qname(sldns_buffer* pkt, uint8_t* qbuf, size_t qbuflen) +{ + uint8_t* d1 = sldns_buffer_at(pkt, 12); + uint8_t* d2 = qbuf+10; + uint8_t len1, len2; + int count = 0; + log_assert(qbuflen >= 15 /* 10 header, root, type, class */); + len1 = *d1++; + len2 = *d2++; + if(sldns_buffer_limit(pkt) < 12+1+4) /* packet too small for qname */ + return 0; + while(len1 != 0 || len2 != 0) { + if(LABEL_IS_PTR(len1)) { + d1 = sldns_buffer_at(pkt, PTR_OFFSET(len1, *d1)); + if(d1 >= sldns_buffer_at(pkt, sldns_buffer_limit(pkt))) + return 0; + len1 = *d1++; + if(count++ > MAX_COMPRESS_PTRS) + return 0; + continue; + } + if(d2 > qbuf+qbuflen) + return 0; + if(len1 != len2) + return 0; + if(len1 > LDNS_MAX_LABELLEN) + return 0; + log_assert(len1 <= LDNS_MAX_LABELLEN); + log_assert(len2 <= LDNS_MAX_LABELLEN); + log_assert(len1 == len2 && len1 != 0); + /* compare the labels - bitwise identical */ + if(memcmp(d1, d2, len1) != 0) + return 0; + d1 += len1; + d2 += len2; + len1 = *d1++; + len2 = *d2++; + } + return 1; +} + +/** call the callbacks for a serviced query */ +static void +serviced_callbacks(struct serviced_query* sq, int error, struct comm_point* c, + struct comm_reply* rep) +{ + struct service_callback* p; + int dobackup = (sq->cblist && sq->cblist->next); /* >1 cb*/ + uint8_t *backup_p = NULL; + size_t backlen = 0; +#ifdef UNBOUND_DEBUG + rbnode_t* rem = +#else + (void) +#endif + /* remove from tree, and schedule for deletion, so that callbacks + * can safely deregister themselves and even create new serviced + * queries that are identical to this one. */ + rbtree_delete(sq->outnet->serviced, sq); + log_assert(rem); /* should have been present */ + sq->to_be_deleted = 1; + verbose(VERB_ALGO, "svcd callbacks start"); + if(sq->outnet->use_caps_for_id && error == NETEVENT_NOERROR && c) { + /* noerror and nxdomain must have a qname in reply */ + if(sldns_buffer_read_u16_at(c->buffer, 4) == 0 && + (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) + == LDNS_RCODE_NOERROR || + LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) + == LDNS_RCODE_NXDOMAIN)) { + verbose(VERB_DETAIL, "no qname in reply to check 0x20ID"); + log_addr(VERB_DETAIL, "from server", + &sq->addr, sq->addrlen); + log_buf(VERB_DETAIL, "for packet", c->buffer); + error = NETEVENT_CLOSED; + c = NULL; + } else if(sldns_buffer_read_u16_at(c->buffer, 4) > 0 && + !serviced_check_qname(c->buffer, sq->qbuf, + sq->qbuflen)) { + verbose(VERB_DETAIL, "wrong 0x20-ID in reply qname"); + log_addr(VERB_DETAIL, "from server", + &sq->addr, sq->addrlen); + log_buf(VERB_DETAIL, "for packet", c->buffer); + error = NETEVENT_CAPSFAIL; + /* and cleanup too */ + pkt_dname_tolower(c->buffer, + sldns_buffer_at(c->buffer, 12)); + } else { + verbose(VERB_ALGO, "good 0x20-ID in reply qname"); + /* cleanup caps, prettier cache contents. */ + pkt_dname_tolower(c->buffer, + sldns_buffer_at(c->buffer, 12)); + } + } + if(dobackup && c) { + /* make a backup of the query, since the querystate processing + * may send outgoing queries that overwrite the buffer. + * use secondary buffer to store the query. + * This is a data copy, but faster than packet to server */ + backlen = sldns_buffer_limit(c->buffer); + backup_p = memdup(sldns_buffer_begin(c->buffer), backlen); + if(!backup_p) { + log_err("malloc failure in serviced query callbacks"); + error = NETEVENT_CLOSED; + c = NULL; + } + sq->outnet->svcd_overhead = backlen; + } + /* test the actual sq->cblist, because the next elem could be deleted*/ + while((p=sq->cblist) != NULL) { + sq->cblist = p->next; /* remove this element */ + if(dobackup && c) { + sldns_buffer_clear(c->buffer); + sldns_buffer_write(c->buffer, backup_p, backlen); + sldns_buffer_flip(c->buffer); + } + fptr_ok(fptr_whitelist_serviced_query(p->cb)); + (void)(*p->cb)(c, p->cb_arg, error, rep); + free(p); + } + if(backup_p) { + free(backup_p); + sq->outnet->svcd_overhead = 0; + } + verbose(VERB_ALGO, "svcd callbacks end"); + log_assert(sq->cblist == NULL); + serviced_delete(sq); +} + +int +serviced_tcp_callback(struct comm_point* c, void* arg, int error, + struct comm_reply* rep) +{ + struct serviced_query* sq = (struct serviced_query*)arg; + struct comm_reply r2; + sq->pending = NULL; /* removed after this callback */ + if(error != NETEVENT_NOERROR) + log_addr(VERB_QUERY, "tcp error for address", + &sq->addr, sq->addrlen); + if(error==NETEVENT_NOERROR) + infra_update_tcp_works(sq->outnet->infra, &sq->addr, + sq->addrlen, sq->zone, sq->zonelen); +#ifdef USE_DNSTAP + if(sq->outnet->dtenv && + (sq->outnet->dtenv->log_resolver_response_messages || + sq->outnet->dtenv->log_forwarder_response_messages)) + dt_msg_send_outside_response(sq->outnet->dtenv, &sq->addr, + c->type, sq->zone, sq->zonelen, sq->qbuf, sq->qbuflen, + &sq->last_sent_time, sq->outnet->now_tv, c->buffer); +#endif + if(error==NETEVENT_NOERROR && sq->status == serviced_query_TCP_EDNS && + (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) == + LDNS_RCODE_FORMERR || LDNS_RCODE_WIRE(sldns_buffer_begin( + c->buffer)) == LDNS_RCODE_NOTIMPL) ) { + /* attempt to fallback to nonEDNS */ + sq->status = serviced_query_TCP_EDNS_fallback; + serviced_tcp_initiate(sq, c->buffer); + return 0; + } else if(error==NETEVENT_NOERROR && + sq->status == serviced_query_TCP_EDNS_fallback && + (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) == + LDNS_RCODE_NOERROR || LDNS_RCODE_WIRE( + sldns_buffer_begin(c->buffer)) == LDNS_RCODE_NXDOMAIN + || LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) + == LDNS_RCODE_YXDOMAIN)) { + /* the fallback produced a result that looks promising, note + * that this server should be approached without EDNS */ + /* only store noEDNS in cache if domain is noDNSSEC */ + if(!sq->want_dnssec) + if(!infra_edns_update(sq->outnet->infra, &sq->addr, + sq->addrlen, sq->zone, sq->zonelen, -1, + *sq->outnet->now_secs)) + log_err("Out of memory caching no edns for host"); + sq->status = serviced_query_TCP; + } + if(sq->tcp_upstream || sq->ssl_upstream) { + struct timeval now = *sq->outnet->now_tv; + if(now.tv_sec > sq->last_sent_time.tv_sec || + (now.tv_sec == sq->last_sent_time.tv_sec && + now.tv_usec > sq->last_sent_time.tv_usec)) { + /* convert from microseconds to milliseconds */ + int roundtime = ((int)(now.tv_sec - sq->last_sent_time.tv_sec))*1000 + + ((int)now.tv_usec - (int)sq->last_sent_time.tv_usec)/1000; + verbose(VERB_ALGO, "measured TCP-time at %d msec", roundtime); + log_assert(roundtime >= 0); + /* only store if less then AUTH_TIMEOUT seconds, it could be + * huge due to system-hibernated and we woke up */ + if(roundtime < TCP_AUTH_QUERY_TIMEOUT*1000) { + if(!infra_rtt_update(sq->outnet->infra, &sq->addr, + sq->addrlen, sq->zone, sq->zonelen, sq->qtype, + roundtime, sq->last_rtt, (time_t)now.tv_sec)) + log_err("out of memory noting rtt."); + } + } + } + /* insert address into reply info */ + if(!rep) { + /* create one if there isn't (on errors) */ + rep = &r2; + r2.c = c; + } + memcpy(&rep->addr, &sq->addr, sq->addrlen); + rep->addrlen = sq->addrlen; + serviced_callbacks(sq, error, c, rep); + return 0; +} + +static void +serviced_tcp_initiate(struct serviced_query* sq, sldns_buffer* buff) +{ + verbose(VERB_ALGO, "initiate TCP query %s", + sq->status==serviced_query_TCP_EDNS?"EDNS":""); + serviced_encode(sq, buff, sq->status == serviced_query_TCP_EDNS); + sq->last_sent_time = *sq->outnet->now_tv; + sq->pending = pending_tcp_query(sq, buff, TCP_AUTH_QUERY_TIMEOUT, + serviced_tcp_callback, sq); + if(!sq->pending) { + /* delete from tree so that a retry by above layer does not + * clash with this entry */ + log_err("serviced_tcp_initiate: failed to send tcp query"); + serviced_callbacks(sq, NETEVENT_CLOSED, NULL, NULL); + } +} + +/** Send serviced query over TCP return false on initial failure */ +static int +serviced_tcp_send(struct serviced_query* sq, sldns_buffer* buff) +{ + int vs, rtt; + uint8_t edns_lame_known; + if(!infra_host(sq->outnet->infra, &sq->addr, sq->addrlen, sq->zone, + sq->zonelen, *sq->outnet->now_secs, &vs, &edns_lame_known, + &rtt)) + return 0; + if(vs != -1) + sq->status = serviced_query_TCP_EDNS; + else sq->status = serviced_query_TCP; + serviced_encode(sq, buff, sq->status == serviced_query_TCP_EDNS); + sq->last_sent_time = *sq->outnet->now_tv; + sq->pending = pending_tcp_query(sq, buff, TCP_AUTH_QUERY_TIMEOUT, + serviced_tcp_callback, sq); + return sq->pending != NULL; +} + +int +serviced_udp_callback(struct comm_point* c, void* arg, int error, + struct comm_reply* rep) +{ + struct serviced_query* sq = (struct serviced_query*)arg; + struct outside_network* outnet = sq->outnet; + struct timeval now = *sq->outnet->now_tv; + int fallback_tcp = 0; + + sq->pending = NULL; /* removed after callback */ + if(error == NETEVENT_TIMEOUT) { + int rto = 0; + if(sq->status == serviced_query_PROBE_EDNS) { + /* non-EDNS probe failed; we do not know its status, + * keep trying with EDNS, timeout may not be caused + * by EDNS. */ + sq->status = serviced_query_UDP_EDNS; + } + if(sq->status == serviced_query_UDP_EDNS && sq->last_rtt < 5000) { + /* fallback to 1480/1280 */ + sq->status = serviced_query_UDP_EDNS_FRAG; + log_name_addr(VERB_ALGO, "try edns1xx0", sq->qbuf+10, + &sq->addr, sq->addrlen); + if(!serviced_udp_send(sq, c->buffer)) { + serviced_callbacks(sq, NETEVENT_CLOSED, c, rep); + } + return 0; + } + if(sq->status == serviced_query_UDP_EDNS_FRAG) { + /* fragmentation size did not fix it */ + sq->status = serviced_query_UDP_EDNS; + } + sq->retry++; + if(!(rto=infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen, + sq->zone, sq->zonelen, sq->qtype, -1, sq->last_rtt, + (time_t)now.tv_sec))) + log_err("out of memory in UDP exponential backoff"); + if(sq->retry < OUTBOUND_UDP_RETRY) { + log_name_addr(VERB_ALGO, "retry query", sq->qbuf+10, + &sq->addr, sq->addrlen); + if(!serviced_udp_send(sq, c->buffer)) { + serviced_callbacks(sq, NETEVENT_CLOSED, c, rep); + } + return 0; + } + if(rto >= RTT_MAX_TIMEOUT) { + fallback_tcp = 1; + /* UDP does not work, fallback to TCP below */ + } else { + serviced_callbacks(sq, NETEVENT_TIMEOUT, c, rep); + return 0; + } + } else if(error != NETEVENT_NOERROR) { + /* udp returns error (due to no ID or interface available) */ + serviced_callbacks(sq, error, c, rep); + return 0; + } +#ifdef USE_DNSTAP + if(outnet->dtenv && + (outnet->dtenv->log_resolver_response_messages || + outnet->dtenv->log_forwarder_response_messages)) + dt_msg_send_outside_response(outnet->dtenv, &sq->addr, c->type, + sq->zone, sq->zonelen, sq->qbuf, sq->qbuflen, + &sq->last_sent_time, sq->outnet->now_tv, c->buffer); +#endif + if(!fallback_tcp) { + if( (sq->status == serviced_query_UDP_EDNS + ||sq->status == serviced_query_UDP_EDNS_FRAG) + && (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) + == LDNS_RCODE_FORMERR || LDNS_RCODE_WIRE( + sldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOTIMPL)) { + /* try to get an answer by falling back without EDNS */ + verbose(VERB_ALGO, "serviced query: attempt without EDNS"); + sq->status = serviced_query_UDP_EDNS_fallback; + sq->retry = 0; + if(!serviced_udp_send(sq, c->buffer)) { + serviced_callbacks(sq, NETEVENT_CLOSED, c, rep); + } + return 0; + } else if(sq->status == serviced_query_PROBE_EDNS) { + /* probe without EDNS succeeds, so we conclude that this + * host likely has EDNS packets dropped */ + log_addr(VERB_DETAIL, "timeouts, concluded that connection to " + "host drops EDNS packets", &sq->addr, sq->addrlen); + /* only store noEDNS in cache if domain is noDNSSEC */ + if(!sq->want_dnssec) + if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen, + sq->zone, sq->zonelen, -1, (time_t)now.tv_sec)) { + log_err("Out of memory caching no edns for host"); + } + sq->status = serviced_query_UDP; + } else if(sq->status == serviced_query_UDP_EDNS && + !sq->edns_lame_known) { + /* now we know that edns queries received answers store that */ + log_addr(VERB_ALGO, "serviced query: EDNS works for", + &sq->addr, sq->addrlen); + if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen, + sq->zone, sq->zonelen, 0, (time_t)now.tv_sec)) { + log_err("Out of memory caching edns works"); + } + sq->edns_lame_known = 1; + } else if(sq->status == serviced_query_UDP_EDNS_fallback && + !sq->edns_lame_known && (LDNS_RCODE_WIRE( + sldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOERROR || + LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) == + LDNS_RCODE_NXDOMAIN || LDNS_RCODE_WIRE(sldns_buffer_begin( + c->buffer)) == LDNS_RCODE_YXDOMAIN)) { + /* the fallback produced a result that looks promising, note + * that this server should be approached without EDNS */ + /* only store noEDNS in cache if domain is noDNSSEC */ + if(!sq->want_dnssec) { + log_addr(VERB_ALGO, "serviced query: EDNS fails for", + &sq->addr, sq->addrlen); + if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen, + sq->zone, sq->zonelen, -1, (time_t)now.tv_sec)) { + log_err("Out of memory caching no edns for host"); + } + } else { + log_addr(VERB_ALGO, "serviced query: EDNS fails, but " + "not stored because need DNSSEC for", &sq->addr, + sq->addrlen); + } + sq->status = serviced_query_UDP; + } + if(now.tv_sec > sq->last_sent_time.tv_sec || + (now.tv_sec == sq->last_sent_time.tv_sec && + now.tv_usec > sq->last_sent_time.tv_usec)) { + /* convert from microseconds to milliseconds */ + int roundtime = ((int)(now.tv_sec - sq->last_sent_time.tv_sec))*1000 + + ((int)now.tv_usec - (int)sq->last_sent_time.tv_usec)/1000; + verbose(VERB_ALGO, "measured roundtrip at %d msec", roundtime); + log_assert(roundtime >= 0); + /* in case the system hibernated, do not enter a huge value, + * above this value gives trouble with server selection */ + if(roundtime < 60000) { + if(!infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen, + sq->zone, sq->zonelen, sq->qtype, roundtime, + sq->last_rtt, (time_t)now.tv_sec)) + log_err("out of memory noting rtt."); + } + } + } /* end of if_!fallback_tcp */ + /* perform TC flag check and TCP fallback after updating our + * cache entries for EDNS status and RTT times */ + if(LDNS_TC_WIRE(sldns_buffer_begin(c->buffer)) || fallback_tcp) { + /* fallback to TCP */ + /* this discards partial UDP contents */ + if(sq->status == serviced_query_UDP_EDNS || + sq->status == serviced_query_UDP_EDNS_FRAG || + sq->status == serviced_query_UDP_EDNS_fallback) + /* if we have unfinished EDNS_fallback, start again */ + sq->status = serviced_query_TCP_EDNS; + else sq->status = serviced_query_TCP; + serviced_tcp_initiate(sq, c->buffer); + return 0; + } + /* yay! an answer */ + serviced_callbacks(sq, error, c, rep); + return 0; +} + +struct serviced_query* +outnet_serviced_query(struct outside_network* outnet, + uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass, + uint16_t flags, int dnssec, int want_dnssec, int nocaps, + int tcp_upstream, int ssl_upstream, struct sockaddr_storage* addr, + socklen_t addrlen, uint8_t* zone, size_t zonelen, + comm_point_callback_t* callback, void* callback_arg, + sldns_buffer* buff) +{ + struct serviced_query* sq; + struct service_callback* cb; + serviced_gen_query(buff, qname, qnamelen, qtype, qclass, flags); + sq = lookup_serviced(outnet, buff, dnssec, addr, addrlen); + /* duplicate entries are included in the callback list, because + * there is a counterpart registration by our caller that needs to + * be doubly-removed (with callbacks perhaps). */ + if(!(cb = (struct service_callback*)malloc(sizeof(*cb)))) + return NULL; + if(!sq) { + /* make new serviced query entry */ + sq = serviced_create(outnet, buff, dnssec, want_dnssec, nocaps, + tcp_upstream, ssl_upstream, addr, addrlen, zone, + zonelen, (int)qtype); + if(!sq) { + free(cb); + return NULL; + } + /* perform first network action */ + if(outnet->do_udp && !(tcp_upstream || ssl_upstream)) { + if(!serviced_udp_send(sq, buff)) { + (void)rbtree_delete(outnet->serviced, sq); + free(sq->qbuf); + free(sq->zone); + free(sq); + free(cb); + return NULL; + } + } else { + if(!serviced_tcp_send(sq, buff)) { + (void)rbtree_delete(outnet->serviced, sq); + free(sq->qbuf); + free(sq->zone); + free(sq); + free(cb); + return NULL; + } + } + } + /* add callback to list of callbacks */ + cb->cb = callback; + cb->cb_arg = callback_arg; + cb->next = sq->cblist; + sq->cblist = cb; + return sq; +} + +/** remove callback from list */ +static void +callback_list_remove(struct serviced_query* sq, void* cb_arg) +{ + struct service_callback** pp = &sq->cblist; + while(*pp) { + if((*pp)->cb_arg == cb_arg) { + struct service_callback* del = *pp; + *pp = del->next; + free(del); + return; + } + pp = &(*pp)->next; + } +} + +void outnet_serviced_query_stop(struct serviced_query* sq, void* cb_arg) +{ + if(!sq) + return; + callback_list_remove(sq, cb_arg); + /* if callbacks() routine scheduled deletion, let it do that */ + if(!sq->cblist && !sq->to_be_deleted) { +#ifdef UNBOUND_DEBUG + rbnode_t* rem = +#else + (void) +#endif + rbtree_delete(sq->outnet->serviced, sq); + log_assert(rem); /* should be present */ + serviced_delete(sq); + } +} + +/** get memory used by waiting tcp entry (in use or not) */ +static size_t +waiting_tcp_get_mem(struct waiting_tcp* w) +{ + size_t s; + if(!w) return 0; + s = sizeof(*w) + w->pkt_len; + if(w->timer) + s += comm_timer_get_mem(w->timer); + return s; +} + +/** get memory used by port if */ +static size_t +if_get_mem(struct port_if* pif) +{ + size_t s; + int i; + s = sizeof(*pif) + sizeof(int)*pif->avail_total + + sizeof(struct port_comm*)*pif->maxout; + for(i=0; i<pif->inuse; i++) + s += sizeof(*pif->out[i]) + + comm_point_get_mem(pif->out[i]->cp); + return s; +} + +/** get memory used by waiting udp */ +static size_t +waiting_udp_get_mem(struct pending* w) +{ + size_t s; + s = sizeof(*w) + comm_timer_get_mem(w->timer) + w->pkt_len; + return s; +} + +size_t outnet_get_mem(struct outside_network* outnet) +{ + size_t i; + int k; + struct waiting_tcp* w; + struct pending* u; + struct serviced_query* sq; + struct service_callback* sb; + struct port_comm* pc; + size_t s = sizeof(*outnet) + sizeof(*outnet->base) + + sizeof(*outnet->udp_buff) + + sldns_buffer_capacity(outnet->udp_buff); + /* second buffer is not ours */ + for(pc = outnet->unused_fds; pc; pc = pc->next) { + s += sizeof(*pc) + comm_point_get_mem(pc->cp); + } + for(k=0; k<outnet->num_ip4; k++) + s += if_get_mem(&outnet->ip4_ifs[k]); + for(k=0; k<outnet->num_ip6; k++) + s += if_get_mem(&outnet->ip6_ifs[k]); + for(u=outnet->udp_wait_first; u; u=u->next_waiting) + s += waiting_udp_get_mem(u); + + s += sizeof(struct pending_tcp*)*outnet->num_tcp; + for(i=0; i<outnet->num_tcp; i++) { + s += sizeof(struct pending_tcp); + s += comm_point_get_mem(outnet->tcp_conns[i]->c); + if(outnet->tcp_conns[i]->query) + s += waiting_tcp_get_mem(outnet->tcp_conns[i]->query); + } + for(w=outnet->tcp_wait_first; w; w = w->next_waiting) + s += waiting_tcp_get_mem(w); + s += sizeof(*outnet->pending); + s += (sizeof(struct pending) + comm_timer_get_mem(NULL)) * + outnet->pending->count; + s += sizeof(*outnet->serviced); + s += outnet->svcd_overhead; + RBTREE_FOR(sq, struct serviced_query*, outnet->serviced) { + s += sizeof(*sq) + sq->qbuflen; + for(sb = sq->cblist; sb; sb = sb->next) + s += sizeof(*sb); + } + return s; +} + +size_t +serviced_get_mem(struct serviced_query* sq) +{ + struct service_callback* sb; + size_t s; + s = sizeof(*sq) + sq->qbuflen; + for(sb = sq->cblist; sb; sb = sb->next) + s += sizeof(*sb); + if(sq->status == serviced_query_UDP_EDNS || + sq->status == serviced_query_UDP || + sq->status == serviced_query_PROBE_EDNS || + sq->status == serviced_query_UDP_EDNS_FRAG || + sq->status == serviced_query_UDP_EDNS_fallback) { + s += sizeof(struct pending); + s += comm_timer_get_mem(NULL); + } else { + /* does not have size of the pkt pointer */ + /* always has a timer except on malloc failures */ + + /* these sizes are part of the main outside network mem */ + /* + s += sizeof(struct waiting_tcp); + s += comm_timer_get_mem(NULL); + */ + } + return s; +} + diff --git a/external/unbound/services/outside_network.h b/external/unbound/services/outside_network.h new file mode 100644 index 000000000..9959676d3 --- /dev/null +++ b/external/unbound/services/outside_network.h @@ -0,0 +1,554 @@ +/* + * services/outside_network.h - listen to answers from the network + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * This file has functions to send queries to authoritative servers, + * and wait for the pending answer, with timeouts. + */ + +#ifndef OUTSIDE_NETWORK_H +#define OUTSIDE_NETWORK_H + +#include "util/rbtree.h" +#include "util/netevent.h" +#include "dnstap/dnstap_config.h" +struct pending; +struct pending_timeout; +struct ub_randstate; +struct pending_tcp; +struct waiting_tcp; +struct waiting_udp; +struct infra_cache; +struct port_comm; +struct port_if; +struct sldns_buffer; +struct serviced_query; +struct dt_env; + +/** + * Send queries to outside servers and wait for answers from servers. + * Contains answer-listen sockets. + */ +struct outside_network { + /** Base for select calls */ + struct comm_base* base; + /** pointer to time in seconds */ + time_t* now_secs; + /** pointer to time in microseconds */ + struct timeval* now_tv; + + /** buffer shared by UDP connections, since there is only one + datagram at any time. */ + struct sldns_buffer* udp_buff; + /** serviced_callbacks malloc overhead when processing multiple + * identical serviced queries to the same server. */ + size_t svcd_overhead; + /** use x20 bits to encode additional ID random bits */ + int use_caps_for_id; + /** outside network wants to quit. Stop queued msgs from sent. */ + int want_to_quit; + + /** number of unwanted replies received (for statistics) */ + size_t unwanted_replies; + /** cumulative total of unwanted replies (for defense) */ + size_t unwanted_total; + /** threshold when to take defensive action. If 0 then never. */ + size_t unwanted_threshold; + /** what action to take, called when defensive action is needed */ + void (*unwanted_action)(void*); + /** user param for action */ + void* unwanted_param; + + /** linked list of available commpoints, unused file descriptors, + * for use as outgoing UDP ports. cp.fd=-1 in them. */ + struct port_comm* unused_fds; + /** if udp is done */ + int do_udp; + /** if udp is delay-closed (delayed answers do not meet closed port)*/ + int delayclose; + /** timeout for delayclose */ + struct timeval delay_tv; + + /** array of outgoing IP4 interfaces */ + struct port_if* ip4_ifs; + /** number of outgoing IP4 interfaces */ + int num_ip4; + + /** array of outgoing IP6 interfaces */ + struct port_if* ip6_ifs; + /** number of outgoing IP6 interfaces */ + int num_ip6; + + /** pending udp queries waiting to be sent out, waiting for fd */ + struct pending* udp_wait_first; + /** last pending udp query in list */ + struct pending* udp_wait_last; + + /** pending udp answers. sorted by id, addr */ + rbtree_t* pending; + /** serviced queries, sorted by qbuf, addr, dnssec */ + rbtree_t* serviced; + /** host cache, pointer but not owned by outnet. */ + struct infra_cache* infra; + /** where to get random numbers */ + struct ub_randstate* rnd; + /** ssl context to create ssl wrapped TCP with DNS connections */ + void* sslctx; +#ifdef USE_DNSTAP + /** dnstap environment */ + struct dt_env* dtenv; +#endif + + /** + * Array of tcp pending used for outgoing TCP connections. + * Each can be used to establish a TCP connection with a server. + * The file descriptors are -1 if they are free, and need to be + * opened for the tcp connection. Can be used for ip4 and ip6. + */ + struct pending_tcp **tcp_conns; + /** number of tcp communication points. */ + size_t num_tcp; + /** number of tcp communication points in use. */ + size_t num_tcp_outgoing; + /** list of tcp comm points that are free for use */ + struct pending_tcp* tcp_free; + /** list of tcp queries waiting for a buffer */ + struct waiting_tcp* tcp_wait_first; + /** last of waiting query list */ + struct waiting_tcp* tcp_wait_last; +}; + +/** + * Outgoing interface. Ports available and currently used are tracked + * per interface + */ +struct port_if { + /** address ready to allocate new socket (except port no). */ + struct sockaddr_storage addr; + /** length of addr field */ + socklen_t addrlen; + + /** the available ports array. These are unused. + * Only the first total-inuse part is filled. */ + int* avail_ports; + /** the total number of available ports (size of the array) */ + int avail_total; + + /** array of the commpoints currently in use. + * allocated for max number of fds, first part in use. */ + struct port_comm** out; + /** max number of fds, size of out array */ + int maxout; + /** number of commpoints (and thus also ports) in use */ + int inuse; +}; + +/** + * Outgoing commpoint for UDP port. + */ +struct port_comm { + /** next in free list */ + struct port_comm* next; + /** which port number (when in use) */ + int number; + /** interface it is used in */ + struct port_if* pif; + /** index in the out array of the interface */ + int index; + /** number of outstanding queries on this port */ + int num_outstanding; + /** UDP commpoint, fd=-1 if not in use */ + struct comm_point* cp; +}; + +/** + * A query that has an answer pending for it. + */ +struct pending { + /** redblacktree entry, key is the pending struct(id, addr). */ + rbnode_t node; + /** the ID for the query. int so that a value out of range can + * be used to signify a pending that is for certain not present in + * the rbtree. (and for which deletion is safe). */ + unsigned int id; + /** remote address. */ + struct sockaddr_storage addr; + /** length of addr field in use. */ + socklen_t addrlen; + /** comm point it was sent on (and reply must come back on). */ + struct port_comm* pc; + /** timeout event */ + struct comm_timer* timer; + /** callback for the timeout, error or reply to the message */ + comm_point_callback_t* cb; + /** callback user argument */ + void* cb_arg; + /** the outside network it is part of */ + struct outside_network* outnet; + /** the corresponding serviced_query */ + struct serviced_query* sq; + + /*---- filled if udp pending is waiting -----*/ + /** next in waiting list. */ + struct pending* next_waiting; + /** timeout in msec */ + int timeout; + /** The query itself, the query packet to send. */ + uint8_t* pkt; + /** length of query packet. */ + size_t pkt_len; +}; + +/** + * Pending TCP query to server. + */ +struct pending_tcp { + /** next in list of free tcp comm points, or NULL. */ + struct pending_tcp* next_free; + /** the ID for the query; checked in reply */ + uint16_t id; + /** tcp comm point it was sent on (and reply must come back on). */ + struct comm_point* c; + /** the query being serviced, NULL if the pending_tcp is unused. */ + struct waiting_tcp* query; +}; + +/** + * Query waiting for TCP buffer. + */ +struct waiting_tcp { + /** + * next in waiting list. + * if pkt==0, this points to the pending_tcp structure. + */ + struct waiting_tcp* next_waiting; + /** timeout event; timer keeps running whether the query is + * waiting for a buffer or the tcp reply is pending */ + struct comm_timer* timer; + /** the outside network it is part of */ + struct outside_network* outnet; + /** remote address. */ + struct sockaddr_storage addr; + /** length of addr field in use. */ + socklen_t addrlen; + /** + * The query itself, the query packet to send. + * allocated after the waiting_tcp structure. + * set to NULL when the query is serviced and it part of pending_tcp. + * if this is NULL, the next_waiting points to the pending_tcp. + */ + uint8_t* pkt; + /** length of query packet. */ + size_t pkt_len; + /** callback for the timeout, error or reply to the message */ + comm_point_callback_t* cb; + /** callback user argument */ + void* cb_arg; + /** if it uses ssl upstream */ + int ssl_upstream; +}; + +/** + * Callback to party interested in serviced query results. + */ +struct service_callback { + /** next in callback list */ + struct service_callback* next; + /** callback function */ + comm_point_callback_t* cb; + /** user argument for callback function */ + void* cb_arg; +}; + +/** fallback size for fragmentation for EDNS in IPv4 */ +#define EDNS_FRAG_SIZE_IP4 1472 +/** fallback size for EDNS in IPv6, fits one fragment with ip6-tunnel-ids */ +#define EDNS_FRAG_SIZE_IP6 1232 + +/** + * Query service record. + * Contains query and destination. UDP, TCP, EDNS are all tried. + * complete with retries and timeouts. A number of interested parties can + * receive a callback. + */ +struct serviced_query { + /** The rbtree node, key is this record */ + rbnode_t node; + /** The query that needs to be answered. Starts with flags u16, + * then qdcount, ..., including qname, qtype, qclass. Does not include + * EDNS record. */ + uint8_t* qbuf; + /** length of qbuf. */ + size_t qbuflen; + /** If an EDNS section is included, the DO/CD bit will be turned on. */ + int dnssec; + /** We want signatures, or else the answer is likely useless */ + int want_dnssec; + /** ignore capsforid */ + int nocaps; + /** tcp upstream used, use tcp, or ssl_upstream for SSL */ + int tcp_upstream, ssl_upstream; + /** where to send it */ + struct sockaddr_storage addr; + /** length of addr field in use. */ + socklen_t addrlen; + /** zone name, uncompressed domain name in wireformat */ + uint8_t* zone; + /** length of zone name */ + size_t zonelen; + /** qtype */ + int qtype; + /** current status */ + enum serviced_query_status { + /** initial status */ + serviced_initial, + /** UDP with EDNS sent */ + serviced_query_UDP_EDNS, + /** UDP without EDNS sent */ + serviced_query_UDP, + /** TCP with EDNS sent */ + serviced_query_TCP_EDNS, + /** TCP without EDNS sent */ + serviced_query_TCP, + /** probe to test EDNS lameness (EDNS is dropped) */ + serviced_query_PROBE_EDNS, + /** probe to test noEDNS0 (EDNS gives FORMERRorNOTIMP) */ + serviced_query_UDP_EDNS_fallback, + /** probe to test TCP noEDNS0 (EDNS gives FORMERRorNOTIMP) */ + serviced_query_TCP_EDNS_fallback, + /** send UDP query with EDNS1480 (or 1280) */ + serviced_query_UDP_EDNS_FRAG + } + /** variable with current status */ + status; + /** true if serviced_query is scheduled for deletion already */ + int to_be_deleted; + /** number of UDP retries */ + int retry; + /** time last UDP was sent */ + struct timeval last_sent_time; + /** rtt of last (UDP) message */ + int last_rtt; + /** do we know edns probe status already, for UDP_EDNS queries */ + int edns_lame_known; + /** outside network this is part of */ + struct outside_network* outnet; + /** list of interested parties that need callback on results. */ + struct service_callback* cblist; + /** the UDP or TCP query that is pending, see status which */ + void* pending; +}; + +/** + * Create outside_network structure with N udp ports. + * @param base: the communication base to use for event handling. + * @param bufsize: size for network buffers. + * @param num_ports: number of udp ports to open per interface. + * @param ifs: interface names (or NULL for default interface). + * These interfaces must be able to access all authoritative servers. + * @param num_ifs: number of names in array ifs. + * @param do_ip4: service IP4. + * @param do_ip6: service IP6. + * @param num_tcp: number of outgoing tcp buffers to preallocate. + * @param infra: pointer to infra cached used for serviced queries. + * @param rnd: stored to create random numbers for serviced queries. + * @param use_caps_for_id: enable to use 0x20 bits to encode id randomness. + * @param availports: array of available ports. + * @param numavailports: number of available ports in array. + * @param unwanted_threshold: when to take defensive action. + * @param unwanted_action: the action to take. + * @param unwanted_param: user parameter to action. + * @param do_udp: if udp is done. + * @param sslctx: context to create outgoing connections with (if enabled). + * @param delayclose: if not 0, udp sockets are delayed before timeout closure. + * msec to wait on timeouted udp sockets. + * @param dtenv: environment to send dnstap events with (if enabled). + * @return: the new structure (with no pending answers) or NULL on error. + */ +struct outside_network* outside_network_create(struct comm_base* base, + size_t bufsize, size_t num_ports, char** ifs, int num_ifs, + int do_ip4, int do_ip6, size_t num_tcp, struct infra_cache* infra, + struct ub_randstate* rnd, int use_caps_for_id, int* availports, + int numavailports, size_t unwanted_threshold, + void (*unwanted_action)(void*), void* unwanted_param, int do_udp, + void* sslctx, int delayclose, struct dt_env *dtenv); + +/** + * Delete outside_network structure. + * @param outnet: object to delete. + */ +void outside_network_delete(struct outside_network* outnet); + +/** + * Prepare for quit. Sends no more queries, even if queued up. + * @param outnet: object to prepare for removal + */ +void outside_network_quit_prepare(struct outside_network* outnet); + +/** + * Send UDP query, create pending answer. + * Changes the ID for the query to be random and unique for that destination. + * @param sq: serviced query. + * @param packet: wireformat query to send to destination. + * @param timeout: in milliseconds from now. + * @param callback: function to call on error, timeout or reply. + * @param callback_arg: user argument for callback function. + * @return: NULL on error for malloc or socket. Else the pending query object. + */ +struct pending* pending_udp_query(struct serviced_query* sq, + struct sldns_buffer* packet, int timeout, comm_point_callback_t* callback, + void* callback_arg); + +/** + * Send TCP query. May wait for TCP buffer. Selects ID to be random, and + * checks id. + * @param sq: serviced query. + * @param packet: wireformat query to send to destination. copied from. + * @param timeout: in seconds from now. + * Timer starts running now. Timer may expire if all buffers are used, + * without any query been sent to the server yet. + * @param callback: function to call on error, timeout or reply. + * @param callback_arg: user argument for callback function. + * @return: false on error for malloc or socket. Else the pending TCP object. + */ +struct waiting_tcp* pending_tcp_query(struct serviced_query* sq, + struct sldns_buffer* packet, int timeout, comm_point_callback_t* callback, + void* callback_arg); + +/** + * Delete pending answer. + * @param outnet: outside network the pending query is part of. + * Internal feature: if outnet is NULL, p is not unlinked from rbtree. + * @param p: deleted + */ +void pending_delete(struct outside_network* outnet, struct pending* p); + +/** + * Perform a serviced query to the authoritative servers. + * Duplicate efforts are detected, and EDNS, TCP and UDP retry is performed. + * @param outnet: outside network, with rbtree of serviced queries. + * @param qname: what qname to query. + * @param qnamelen: length of qname in octets including 0 root label. + * @param qtype: rrset type to query (host format) + * @param qclass: query class. (host format) + * @param flags: flags u16 (host format), includes opcode, CD bit. + * @param dnssec: if set, DO bit is set in EDNS queries. + * If the value includes BIT_CD, CD bit is set when in EDNS queries. + * If the value includes BIT_DO, DO bit is set when in EDNS queries. + * @param want_dnssec: signatures are needed, without EDNS the answer is + * likely to be useless. + * @param nocaps: ignore use_caps_for_id and use unperturbed qname. + * @param tcp_upstream: use TCP for upstream queries. + * @param ssl_upstream: use SSL for upstream queries. + * @param callback: callback function. + * @param callback_arg: user argument to callback function. + * @param addr: to which server to send the query. + * @param addrlen: length of addr. + * @param zone: name of the zone of the delegation point. wireformat dname. + This is the delegation point name for which the server is deemed + authoritative. + * @param zonelen: length of zone. + * @param buff: scratch buffer to create query contents in. Empty on exit. + * @return 0 on error, or pointer to serviced query that is used to answer + * this serviced query may be shared with other callbacks as well. + */ +struct serviced_query* outnet_serviced_query(struct outside_network* outnet, + uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass, + uint16_t flags, int dnssec, int want_dnssec, int nocaps, + int tcp_upstream, int ssl_upstream, struct sockaddr_storage* addr, + socklen_t addrlen, uint8_t* zone, size_t zonelen, + comm_point_callback_t* callback, void* callback_arg, + struct sldns_buffer* buff); + +/** + * Remove service query callback. + * If that leads to zero callbacks, the query is completely cancelled. + * @param sq: serviced query to adjust. + * @param cb_arg: callback argument of callback that needs removal. + * same as the callback_arg to outnet_serviced_query(). + */ +void outnet_serviced_query_stop(struct serviced_query* sq, void* cb_arg); + +/** + * Get memory size in use by outside network. + * Counts buffers and outstanding query (serviced queries) malloced data. + * @param outnet: outside network structure. + * @return size in bytes. + */ +size_t outnet_get_mem(struct outside_network* outnet); + +/** + * Get memory size in use by serviced query while it is servicing callbacks. + * This takes into account the pre-deleted status of it; it will be deleted + * when the callbacks are done. + * @param sq: serviced query. + * @return size in bytes. + */ +size_t serviced_get_mem(struct serviced_query* sq); + +/** callback for incoming udp answers from the network */ +int outnet_udp_cb(struct comm_point* c, void* arg, int error, + struct comm_reply *reply_info); + +/** callback for pending tcp connections */ +int outnet_tcp_cb(struct comm_point* c, void* arg, int error, + struct comm_reply *reply_info); + +/** callback for udp timeout */ +void pending_udp_timer_cb(void *arg); + +/** callback for udp delay for timeout */ +void pending_udp_timer_delay_cb(void *arg); + +/** callback for outgoing TCP timer event */ +void outnet_tcptimer(void* arg); + +/** callback for serviced query UDP answers */ +int serviced_udp_callback(struct comm_point* c, void* arg, int error, + struct comm_reply* rep); + +/** TCP reply or error callback for serviced queries */ +int serviced_tcp_callback(struct comm_point* c, void* arg, int error, + struct comm_reply* rep); + +/** compare function of pending rbtree */ +int pending_cmp(const void* key1, const void* key2); + +/** compare function of serviced query rbtree */ +int serviced_cmp(const void* key1, const void* key2); + +#endif /* OUTSIDE_NETWORK_H */ |