You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
495 lines
16 KiB
495 lines
16 KiB
From a799ca0c6314ad73a97bc6c89382d2712a9c0b0e Mon Sep 17 00:00:00 2001
|
|
From: Simon Kelley <simon@thekelleys.org.uk>
|
|
Date: Thu, 18 Oct 2018 19:35:29 +0100
|
|
Subject: [PATCH 01/32] Impove cache behaviour for TCP connections.
|
|
|
|
For ease of implementaion, dnsmasq has always forked a new process to
|
|
handle each incoming TCP connection. A side-effect of this is that any
|
|
DNS queries answered from TCP connections are not cached: when TCP
|
|
connections were rare, this was not a problem. With the coming of
|
|
DNSSEC, it's now the case that some DNSSEC queries have answers which
|
|
spill to TCP, and if, for instance, this applies to the keys for the
|
|
root then those never get cached, and performance is very bad. This
|
|
fix passes cache entries back from the TCP child process to the main
|
|
server process, and fixes the problem.
|
|
|
|
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
|
---
|
|
CHANGELOG | 14 ++++
|
|
src/blockdata.c | 37 ++++++++-
|
|
src/cache.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++--
|
|
src/dnsmasq.c | 58 ++++++++++++--
|
|
src/dnsmasq.h | 5 ++
|
|
5 files changed, 291 insertions(+), 19 deletions(-)
|
|
|
|
--- a/CHANGELOG
|
|
+++ b/CHANGELOG
|
|
@@ -1,3 +1,17 @@
|
|
+version 2.81
|
|
+ Impove cache behaviour for TCP connections. For ease of
|
|
+ implementaion, dnsmasq has always forked a new process to handle
|
|
+ each incoming TCP connection. A side-effect of this is that
|
|
+ any DNS queries answered from TCP connections are not cached:
|
|
+ when TCP connections were rare, this was not a problem.
|
|
+ With the coming of DNSSEC, it's now the case that some
|
|
+ DNSSEC queries have answers which spill to TCP, and if,
|
|
+ for instance, this applies to the keys for the root then
|
|
+ those never get cached, and performance is very bad.
|
|
+ This fix passes cache entries back from the TCP child process to
|
|
+ the main server process, and fixes the problem.
|
|
+
|
|
+
|
|
version 2.80
|
|
Add support for RFC 4039 DHCP rapid commit. Thanks to Ashram Method
|
|
for the initial patch and motivation.
|
|
--- a/src/blockdata.c
|
|
+++ b/src/blockdata.c
|
|
@@ -61,7 +61,7 @@ void blockdata_report(void)
|
|
blockdata_alloced * sizeof(struct blockdata));
|
|
}
|
|
|
|
-struct blockdata *blockdata_alloc(char *data, size_t len)
|
|
+static struct blockdata *blockdata_alloc_real(int fd, char *data, size_t len)
|
|
{
|
|
struct blockdata *block, *ret = NULL;
|
|
struct blockdata **prev = &ret;
|
|
@@ -89,8 +89,17 @@ struct blockdata *blockdata_alloc(char *
|
|
blockdata_hwm = blockdata_count;
|
|
|
|
blen = len > KEYBLOCK_LEN ? KEYBLOCK_LEN : len;
|
|
- memcpy(block->key, data, blen);
|
|
- data += blen;
|
|
+ if (data)
|
|
+ {
|
|
+ memcpy(block->key, data, blen);
|
|
+ data += blen;
|
|
+ }
|
|
+ else if (!read_write(fd, block->key, blen, 1))
|
|
+ {
|
|
+ /* failed read free partial chain */
|
|
+ blockdata_free(ret);
|
|
+ return NULL;
|
|
+ }
|
|
len -= blen;
|
|
*prev = block;
|
|
prev = &block->next;
|
|
@@ -100,6 +109,10 @@ struct blockdata *blockdata_alloc(char *
|
|
return ret;
|
|
}
|
|
|
|
+struct blockdata *blockdata_alloc(char *data, size_t len)
|
|
+{
|
|
+ return blockdata_alloc_real(0, data, len);
|
|
+}
|
|
|
|
void blockdata_free(struct blockdata *blocks)
|
|
{
|
|
@@ -148,5 +161,21 @@ void *blockdata_retrieve(struct blockdat
|
|
|
|
return data;
|
|
}
|
|
-
|
|
+
|
|
+
|
|
+void blockdata_write(struct blockdata *block, size_t len, int fd)
|
|
+{
|
|
+ for (; len > 0 && block; block = block->next)
|
|
+ {
|
|
+ size_t blen = len > KEYBLOCK_LEN ? KEYBLOCK_LEN : len;
|
|
+ read_write(fd, block->key, blen, 0);
|
|
+ len -= blen;
|
|
+ }
|
|
+}
|
|
+
|
|
+struct blockdata *blockdata_read(int fd, size_t len)
|
|
+{
|
|
+ return blockdata_alloc_real(fd, NULL, len);
|
|
+}
|
|
+
|
|
#endif
|
|
--- a/src/cache.c
|
|
+++ b/src/cache.c
|
|
@@ -26,6 +26,8 @@ static union bigname *big_free = NULL;
|
|
static int bignames_left, hash_size;
|
|
|
|
static void make_non_terminals(struct crec *source);
|
|
+static struct crec *really_insert(char *name, struct all_addr *addr,
|
|
+ time_t now, unsigned long ttl, unsigned short flags);
|
|
|
|
/* type->string mapping: this is also used by the name-hash function as a mixing table. */
|
|
static const struct {
|
|
@@ -464,16 +466,10 @@ void cache_start_insert(void)
|
|
new_chain = NULL;
|
|
insert_error = 0;
|
|
}
|
|
-
|
|
+
|
|
struct crec *cache_insert(char *name, struct all_addr *addr,
|
|
time_t now, unsigned long ttl, unsigned short flags)
|
|
{
|
|
- struct crec *new, *target_crec = NULL;
|
|
- union bigname *big_name = NULL;
|
|
- int freed_all = flags & F_REVERSE;
|
|
- int free_avail = 0;
|
|
- unsigned int target_uid;
|
|
-
|
|
/* Don't log DNSSEC records here, done elsewhere */
|
|
if (flags & (F_IPV4 | F_IPV6 | F_CNAME))
|
|
{
|
|
@@ -484,7 +480,20 @@ struct crec *cache_insert(char *name, st
|
|
if (daemon->min_cache_ttl != 0 && daemon->min_cache_ttl > ttl)
|
|
ttl = daemon->min_cache_ttl;
|
|
}
|
|
+
|
|
+ return really_insert(name, addr, now, ttl, flags);
|
|
+}
|
|
|
|
+
|
|
+static struct crec *really_insert(char *name, struct all_addr *addr,
|
|
+ time_t now, unsigned long ttl, unsigned short flags)
|
|
+{
|
|
+ struct crec *new, *target_crec = NULL;
|
|
+ union bigname *big_name = NULL;
|
|
+ int freed_all = flags & F_REVERSE;
|
|
+ int free_avail = 0;
|
|
+ unsigned int target_uid;
|
|
+
|
|
/* if previous insertion failed give up now. */
|
|
if (insert_error)
|
|
return NULL;
|
|
@@ -645,12 +654,185 @@ void cache_end_insert(void)
|
|
cache_hash(new_chain);
|
|
cache_link(new_chain);
|
|
daemon->metrics[METRIC_DNS_CACHE_INSERTED]++;
|
|
+
|
|
+ /* If we're a child process, send this cache entry up the pipe to the master.
|
|
+ The marshalling process is rather nasty. */
|
|
+ if (daemon->pipe_to_parent != -1)
|
|
+ {
|
|
+ char *name = cache_get_name(new_chain);
|
|
+ ssize_t m = strlen(name);
|
|
+ unsigned short flags = new_chain->flags;
|
|
+#ifdef HAVE_DNSSEC
|
|
+ u16 class = new_chain->uid;
|
|
+#endif
|
|
+
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)&m, sizeof(m), 0);
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)name, m, 0);
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->ttd, sizeof(new_chain->ttd), 0);
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)&flags, sizeof(flags), 0);
|
|
+
|
|
+ if (flags & (F_IPV4 | F_IPV6))
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr, sizeof(new_chain->addr), 0);
|
|
+#ifdef HAVE_DNSSEC
|
|
+ else if (flags & F_DNSKEY)
|
|
+ {
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)&class, sizeof(class), 0);
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.algo, sizeof(new_chain->addr.key.algo), 0);
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.keytag, sizeof(new_chain->addr.key.keytag), 0);
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.flags, sizeof(new_chain->addr.key.flags), 0);
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.keylen, sizeof(new_chain->addr.key.keylen), 0);
|
|
+ blockdata_write(new_chain->addr.key.keydata, new_chain->addr.key.keylen, daemon->pipe_to_parent);
|
|
+ }
|
|
+ else if (flags & F_DS)
|
|
+ {
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)&class, sizeof(class), 0);
|
|
+ /* A negative DS entry is possible and has no data, obviously. */
|
|
+ if (!(flags & F_NEG))
|
|
+ {
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.algo, sizeof(new_chain->addr.ds.algo), 0);
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.keytag, sizeof(new_chain->addr.ds.keytag), 0);
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.digest, sizeof(new_chain->addr.ds.digest), 0);
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.keylen, sizeof(new_chain->addr.ds.keylen), 0);
|
|
+ blockdata_write(new_chain->addr.ds.keydata, new_chain->addr.ds.keylen, daemon->pipe_to_parent);
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ }
|
|
}
|
|
+
|
|
new_chain = tmp;
|
|
}
|
|
+
|
|
+ /* signal end of cache insert in master process */
|
|
+ if (daemon->pipe_to_parent != -1)
|
|
+ {
|
|
+ ssize_t m = -1;
|
|
+ read_write(daemon->pipe_to_parent, (unsigned char *)&m, sizeof(m), 0);
|
|
+ }
|
|
+
|
|
new_chain = NULL;
|
|
}
|
|
|
|
+
|
|
+/* A marshalled cache entry arrives on fd, read, unmarshall and insert into cache of master process. */
|
|
+int cache_recv_insert(time_t now, int fd)
|
|
+{
|
|
+ ssize_t m;
|
|
+ struct all_addr addr;
|
|
+ unsigned long ttl;
|
|
+ time_t ttd;
|
|
+ unsigned short flags;
|
|
+ struct crec *crecp = NULL;
|
|
+
|
|
+ cache_start_insert();
|
|
+
|
|
+ while(1)
|
|
+ {
|
|
+
|
|
+ if (!read_write(fd, (unsigned char *)&m, sizeof(m), 1))
|
|
+ return 0;
|
|
+
|
|
+ if (m == -1)
|
|
+ {
|
|
+ cache_end_insert();
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ if (!read_write(fd, (unsigned char *)daemon->namebuff, m, 1) ||
|
|
+ !read_write(fd, (unsigned char *)&ttd, sizeof(ttd), 1) ||
|
|
+ !read_write(fd, (unsigned char *)&flags, sizeof(flags), 1))
|
|
+ return 0;
|
|
+
|
|
+ daemon->namebuff[m] = 0;
|
|
+
|
|
+ ttl = difftime(ttd, now);
|
|
+
|
|
+ if (flags & (F_IPV4 | F_IPV6))
|
|
+ {
|
|
+ if (!read_write(fd, (unsigned char *)&addr, sizeof(addr), 1))
|
|
+ return 0;
|
|
+ crecp = really_insert(daemon->namebuff, &addr, now, ttl, flags);
|
|
+ }
|
|
+ else if (flags & F_CNAME)
|
|
+ {
|
|
+ struct crec *newc = really_insert(daemon->namebuff, NULL, now, ttl, flags);
|
|
+ /* This relies on the fact the the target of a CNAME immediately preceeds
|
|
+ it because of the order of extraction in extract_addresses, and
|
|
+ the order reversal on the new_chain. */
|
|
+ if (newc)
|
|
+ {
|
|
+ if (!crecp)
|
|
+ {
|
|
+ newc->addr.cname.target.cache = NULL;
|
|
+ /* anything other than zero, to avoid being mistaken for CNAME to interface-name */
|
|
+ newc->addr.cname.uid = 1;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ next_uid(crecp);
|
|
+ newc->addr.cname.target.cache = crecp;
|
|
+ newc->addr.cname.uid = crecp->uid;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+#ifdef HAVE_DNSSEC
|
|
+ else if (flags & (F_DNSKEY | F_DS))
|
|
+ {
|
|
+ unsigned short class, keylen, keyflags, keytag;
|
|
+ unsigned char algo, digest;
|
|
+ struct blockdata *keydata;
|
|
+
|
|
+ if (!read_write(fd, (unsigned char *)&class, sizeof(class), 1))
|
|
+ return 0;
|
|
+ /* Cache needs to known class for DNSSEC stuff */
|
|
+ addr.addr.dnssec.class = class;
|
|
+
|
|
+ crecp = really_insert(daemon->namebuff, &addr, now, ttl, flags);
|
|
+
|
|
+ if (flags & F_DNSKEY)
|
|
+ {
|
|
+ if (!read_write(fd, (unsigned char *)&algo, sizeof(algo), 1) ||
|
|
+ !read_write(fd, (unsigned char *)&keytag, sizeof(keytag), 1) ||
|
|
+ !read_write(fd, (unsigned char *)&keyflags, sizeof(keyflags), 1) ||
|
|
+ !read_write(fd, (unsigned char *)&keylen, sizeof(keylen), 1) ||
|
|
+ !(keydata = blockdata_read(fd, keylen)))
|
|
+ return 0;
|
|
+ }
|
|
+ else if (!(flags & F_NEG))
|
|
+ {
|
|
+ if (!read_write(fd, (unsigned char *)&algo, sizeof(algo), 1) ||
|
|
+ !read_write(fd, (unsigned char *)&keytag, sizeof(keytag), 1) ||
|
|
+ !read_write(fd, (unsigned char *)&digest, sizeof(digest), 1) ||
|
|
+ !read_write(fd, (unsigned char *)&keylen, sizeof(keylen), 1) ||
|
|
+ !(keydata = blockdata_read(fd, keylen)))
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ if (crecp)
|
|
+ {
|
|
+ if (flags & F_DNSKEY)
|
|
+ {
|
|
+ crecp->addr.key.algo = algo;
|
|
+ crecp->addr.key.keytag = keytag;
|
|
+ crecp->addr.key.flags = flags;
|
|
+ crecp->addr.key.keylen = keylen;
|
|
+ crecp->addr.key.keydata = keydata;
|
|
+ }
|
|
+ else if (!(flags & F_NEG))
|
|
+ {
|
|
+ crecp->addr.ds.algo = algo;
|
|
+ crecp->addr.ds.keytag = keytag;
|
|
+ crecp->addr.ds.digest = digest;
|
|
+ crecp->addr.ds.keylen = keylen;
|
|
+ crecp->addr.ds.keydata = keydata;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+ }
|
|
+}
|
|
+
|
|
int cache_find_non_terminal(char *name, time_t now)
|
|
{
|
|
struct crec *crecp;
|
|
--- a/src/dnsmasq.c
|
|
+++ b/src/dnsmasq.c
|
|
@@ -930,6 +930,10 @@ int main (int argc, char **argv)
|
|
check_servers();
|
|
|
|
pid = getpid();
|
|
+
|
|
+ daemon->pipe_to_parent = -1;
|
|
+ for (i = 0; i < MAX_PROCS; i++)
|
|
+ daemon->tcp_pipes[i] = -1;
|
|
|
|
#ifdef HAVE_INOTIFY
|
|
/* Using inotify, have to select a resolv file at startup */
|
|
@@ -1611,7 +1615,7 @@ static int set_dns_listeners(time_t now)
|
|
we don't need to explicitly arrange to wake up here */
|
|
if (listener->tcpfd != -1)
|
|
for (i = 0; i < MAX_PROCS; i++)
|
|
- if (daemon->tcp_pids[i] == 0)
|
|
+ if (daemon->tcp_pids[i] == 0 && daemon->tcp_pipes[i] == -1)
|
|
{
|
|
poll_listen(listener->tcpfd, POLLIN);
|
|
break;
|
|
@@ -1624,6 +1628,13 @@ static int set_dns_listeners(time_t now)
|
|
|
|
}
|
|
|
|
+#ifndef NO_FORK
|
|
+ if (!option_bool(OPT_DEBUG))
|
|
+ for (i = 0; i < MAX_PROCS; i++)
|
|
+ if (daemon->tcp_pipes[i] != -1)
|
|
+ poll_listen(daemon->tcp_pipes[i], POLLIN);
|
|
+#endif
|
|
+
|
|
return wait;
|
|
}
|
|
|
|
@@ -1632,7 +1643,10 @@ static void check_dns_listeners(time_t n
|
|
struct serverfd *serverfdp;
|
|
struct listener *listener;
|
|
int i;
|
|
-
|
|
+#ifndef NO_FORK
|
|
+ int pipefd[2];
|
|
+#endif
|
|
+
|
|
for (serverfdp = daemon->sfds; serverfdp; serverfdp = serverfdp->next)
|
|
if (poll_check(serverfdp->fd, POLLIN))
|
|
reply_query(serverfdp->fd, serverfdp->source_addr.sa.sa_family, now);
|
|
@@ -1642,7 +1656,26 @@ static void check_dns_listeners(time_t n
|
|
if (daemon->randomsocks[i].refcount != 0 &&
|
|
poll_check(daemon->randomsocks[i].fd, POLLIN))
|
|
reply_query(daemon->randomsocks[i].fd, daemon->randomsocks[i].family, now);
|
|
-
|
|
+
|
|
+#ifndef NO_FORK
|
|
+ /* Races. The child process can die before we read all of the data from the
|
|
+ pipe, or vice versa. Therefore send tcp_pids to zero when we wait() the
|
|
+ process, and tcp_pipes to -1 and close the FD when we read the last
|
|
+ of the data - indicated by cache_recv_insert returning zero.
|
|
+ The order of these events is indeterminate, and both are needed
|
|
+ to free the process slot. Once the child process has gone, poll()
|
|
+ returns POLLHUP, not POLLIN, so have to check for both here. */
|
|
+ if (!option_bool(OPT_DEBUG))
|
|
+ for (i = 0; i < MAX_PROCS; i++)
|
|
+ if (daemon->tcp_pipes[i] != -1 &&
|
|
+ poll_check(daemon->tcp_pipes[i], POLLIN | POLLHUP) &&
|
|
+ !cache_recv_insert(now, daemon->tcp_pipes[i]))
|
|
+ {
|
|
+ close(daemon->tcp_pipes[i]);
|
|
+ daemon->tcp_pipes[i] = -1;
|
|
+ }
|
|
+#endif
|
|
+
|
|
for (listener = daemon->listeners; listener; listener = listener->next)
|
|
{
|
|
if (listener->fd != -1 && poll_check(listener->fd, POLLIN))
|
|
@@ -1736,15 +1769,20 @@ static void check_dns_listeners(time_t n
|
|
while (retry_send(close(confd)));
|
|
}
|
|
#ifndef NO_FORK
|
|
- else if (!option_bool(OPT_DEBUG) && (p = fork()) != 0)
|
|
+ else if (!option_bool(OPT_DEBUG) && pipe(pipefd) == 0 && (p = fork()) != 0)
|
|
{
|
|
- if (p != -1)
|
|
+ close(pipefd[1]); /* parent needs read pipe end. */
|
|
+ if (p == -1)
|
|
+ close(pipefd[0]);
|
|
+ else
|
|
{
|
|
int i;
|
|
+
|
|
for (i = 0; i < MAX_PROCS; i++)
|
|
- if (daemon->tcp_pids[i] == 0)
|
|
+ if (daemon->tcp_pids[i] == 0 && daemon->tcp_pipes[i] == -1)
|
|
{
|
|
daemon->tcp_pids[i] = p;
|
|
+ daemon->tcp_pipes[i] = pipefd[0];
|
|
break;
|
|
}
|
|
}
|
|
@@ -1761,7 +1799,7 @@ static void check_dns_listeners(time_t n
|
|
int flags;
|
|
struct in_addr netmask;
|
|
int auth_dns;
|
|
-
|
|
+
|
|
if (iface)
|
|
{
|
|
netmask = iface->netmask;
|
|
@@ -1777,7 +1815,11 @@ static void check_dns_listeners(time_t n
|
|
/* Arrange for SIGALRM after CHILD_LIFETIME seconds to
|
|
terminate the process. */
|
|
if (!option_bool(OPT_DEBUG))
|
|
- alarm(CHILD_LIFETIME);
|
|
+ {
|
|
+ alarm(CHILD_LIFETIME);
|
|
+ close(pipefd[0]); /* close read end in child. */
|
|
+ daemon->pipe_to_parent = pipefd[1];
|
|
+ }
|
|
#endif
|
|
|
|
/* start with no upstream connections. */
|
|
--- a/src/dnsmasq.h
|
|
+++ b/src/dnsmasq.h
|
|
@@ -1091,6 +1091,8 @@ extern struct daemon {
|
|
size_t packet_len; /* " " */
|
|
struct randfd *rfd_save; /* " " */
|
|
pid_t tcp_pids[MAX_PROCS];
|
|
+ int tcp_pipes[MAX_PROCS];
|
|
+ int pipe_to_parent;
|
|
struct randfd randomsocks[RANDOM_SOCKS];
|
|
int v6pktinfo;
|
|
struct addrlist *interface_addrs; /* list of all addresses/prefix lengths associated with all local interfaces */
|
|
@@ -1152,6 +1154,7 @@ struct crec *cache_find_by_name(struct c
|
|
char *name, time_t now, unsigned int prot);
|
|
void cache_end_insert(void);
|
|
void cache_start_insert(void);
|
|
+int cache_recv_insert(time_t now, int fd);
|
|
struct crec *cache_insert(char *name, struct all_addr *addr,
|
|
time_t now, unsigned long ttl, unsigned short flags);
|
|
void cache_reload(void);
|
|
@@ -1174,6 +1177,8 @@ void blockdata_init(void);
|
|
void blockdata_report(void);
|
|
struct blockdata *blockdata_alloc(char *data, size_t len);
|
|
void *blockdata_retrieve(struct blockdata *block, size_t len, void *data);
|
|
+struct blockdata *blockdata_read(int fd, size_t len);
|
|
+void blockdata_write(struct blockdata *block, size_t len, int fd);
|
|
void blockdata_free(struct blockdata *blocks);
|
|
#endif
|
|
|
|
|