loss of masters over ipsec hoses bind
Mark Andrews
Mark_Andrews at isc.org
Mon Dec 24 01:56:49 UTC 2007
>
> > I'm currently running Bind 9.4.1 (Ubuntu Gutsy). I have several zones
> > in master->slave setups, which normally works just fine. The other
> > day, however, I ran into an odd problem. A couple of the slave zones
> > generally update over an ipsec connected network. The ipsec
> > connection went away, and shortly thereafter bind royally wedged
> > itself, refusing to serve any data (including basic forward lookups)
> > and was not even responding to rndc restarts. It took me a good while
> > of restarting the system and poking around logs to decide to strace
> > the process, which eventually lead me to removing the ipsec-dependant
> > slave zones from the config. As soon as I did this, Bind became
> > stable again. Interestingly, zones which updated over public IP space
> > behaved fine, even if the master server was unreachable. It was only
> > zones that were trying to go over the down ipsec connection that hosed
> > the daemon.
> >
> > This whole issue is logged in a bit more detail here, including output
> > from strace:
> > https://bugs.launchpad.net/ubuntu/+source/bind/+bug/177489
> >
> > I can (apparently) reproduce this issue again with little difficulty,
> > so I'd be glad to help debug it.
> >
> > -
> > Matt LaPlante
>
> I would say that some I/O is blocking when it shouldn't
> with sockets which use ipsec. If this is the case it is
> a kernel bug and named can't do anything to prevent it.
> Named marks all sockets as non-blocking.
>
> Mark
We should be able to prove / disprove the theory above with
this patch. It sets a 2 second alarm for all socket I/O.
You need to build named with threaded disabled and run with
"named -g <rest of named's arguements>".
You will get "ALARM: message" printed to stderr.
Mark
Index: lib/isc/unix/socket.c
===================================================================
RCS file: /proj/cvs/prod/bind9/lib/isc/unix/socket.c,v
retrieving revision 1.275
diff -u -r1.275 socket.c
--- lib/isc/unix/socket.c 14 Dec 2007 03:52:40 -0000 1.275
+++ lib/isc/unix/socket.c 24 Dec 2007 01:27:38 -0000
@@ -281,6 +281,18 @@
#define SOCK_DEAD(s) ((s)->references == 0)
+#ifndef ISC_PLATFORM_USETHREADS
+static const char *alarm_msg = "ALARM: none\n";
+static void alarm_handler(int foo) {
+
+ UNUSED(foo);
+ /*
+ * Write message to stderr.
+ */
+ write(2, alarm_msg, strlen(alarm_msg));
+}
+#endif
+
static void
manager_log(isc_socketmgr_t *sockmgr,
isc_logcategory_t *category, isc_logmodule_t *module, int level,
@@ -985,8 +997,16 @@
dump_msg(&msghdr);
#endif
+#ifndef ISC_PLATFORM_USETHREADS
+ alarm(2);
+ alarm_msg = "ALARM: recvmsg\n";
+#endif
cc = recvmsg(sock->fd, &msghdr, 0);
recv_errno = errno;
+#ifndef ISC_PLATFORM_USETHREADS
+ alarm(0);
+ alarm_msg = "ALARM: none\n";
+#endif
#if defined(ISC_SOCKET_DEBUG)
dump_msg(&msghdr);
@@ -1142,8 +1162,16 @@
build_msghdr_send(sock, dev, &msghdr, iov, &write_count);
resend:
+#ifndef ISC_PLATFORM_USETHREADS
+ alarm(2);
+ alarm_msg = "ALARM: sendmsg\n";
+#endif
cc = sendmsg(sock->fd, &msghdr, 0);
send_errno = errno;
+#ifndef ISC_PLATFORM_USETHREADS
+ alarm(0);
+ alarm_msg = "ALARM: none\n";
+#endif
/*
* Check for error or block condition.
@@ -2623,6 +2651,7 @@
REQUIRE(managerp != NULL && *managerp == NULL);
#ifndef ISC_PLATFORM_USETHREADS
+ signal(SIGALRM, alarm_handler);
if (socketmgr != NULL) {
socketmgr->refs++;
*managerp = socketmgr;
@@ -3281,6 +3310,10 @@
goto cleanup;
}
+#ifndef ISC_PLATFORM_USETHREADS
+ alarm(2);
+ alarm_msg = "ALARM: connect unix\n";
+#endif
if (connect(s, (struct sockaddr *)&sockaddr->type.sunix,
sizeof(sockaddr->type.sunix)) < 0) {
switch (errno) {
@@ -3306,6 +3339,10 @@
break;
}
}
+#ifndef ISC_PLATFORM_USETHREADS
+ alarm(0);
+ alarm_msg = "ALARM: none\n";
+#endif
cleanup:
close(s);
#else
@@ -3573,7 +3610,7 @@
isc_socket_connev_t *dev;
isc_task_t *ntask = NULL;
isc_socketmgr_t *manager;
- int cc;
+ int cc, connect_errno;
char strbuf[ISC_STRERRORSIZE];
REQUIRE(VALID_SOCKET(sock));
@@ -3607,12 +3644,21 @@
* outstanding, and it might happen to complete.
*/
sock->peer_address = *addr;
+#ifndef ISC_PLATFORM_USETHREADS
+ alarm(2);
+ alarm_msg = "ALARM: connect\n";
+#endif
cc = connect(sock->fd, &addr->type.sa, addr->length);
+ connect_errno = errno;
+#ifndef ISC_PLATFORM_USETHREADS
+ alarm(0);
+ alarm_msg = "ALARM: none\n";
+#endif
if (cc < 0) {
- if (SOFT_ERROR(errno) || errno == EINPROGRESS)
+ if (SOFT_ERROR(connect_errno) || connect_errno == EINPROGRESS)
goto queue;
- switch (errno) {
+ switch (connect_errno) {
#define ERROR_MATCH(a, b) case a: dev->result = b; goto err_exit;
ERROR_MATCH(EACCES, ISC_R_NOPERM);
ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
@@ -3632,8 +3678,9 @@
sock->connected = 0;
- isc__strerror(errno, strbuf, sizeof(strbuf));
- UNEXPECTED_ERROR(__FILE__, __LINE__, "%d/%s", errno, strbuf);
+ isc__strerror(connect_errno, strbuf, sizeof(strbuf));
+ UNEXPECTED_ERROR(__FILE__, __LINE__, "%d/%s", connect_errno,
+ strbuf);
UNLOCK(&sock->lock);
isc_event_free(ISC_EVENT_PTR(&dev));
--
Mark Andrews, ISC
1 Seymour St., Dundas Valley, NSW 2117, Australia
PHONE: +61 2 9871 4742 INTERNET: Mark_Andrews at isc.org
More information about the bind-users
mailing list