loss of masters over ipsec hoses bind

Mark Andrews Mark_Andrews at isc.org
Mon Dec 24 01:56:49 UTC 2007


> 
> > I'm currently running Bind 9.4.1 (Ubuntu Gutsy).  I have several zones
> > in master->slave setups, which normally works just fine.  The other
> > day, however, I ran into an odd problem.  A couple of the slave zones
> > generally update over an ipsec connected network.  The ipsec
> > connection went away, and shortly thereafter bind royally wedged
> > itself, refusing to serve any data (including basic forward lookups)
> > and was not even responding to rndc restarts.  It took me a good while
> > of restarting the system and poking around logs to decide to strace
> > the process, which eventually lead me to removing the ipsec-dependant
> > slave zones from the config.  As soon as I did this, Bind became
> > stable again.  Interestingly, zones which updated over public IP space
> > behaved fine, even if the master server was unreachable.  It was only
> > zones that were trying to go over the down ipsec connection that hosed
> > the daemon.
> > 
> > This whole issue is logged in a bit more detail here, including output
> > from strace:
> > https://bugs.launchpad.net/ubuntu/+source/bind/+bug/177489
> > 
> > I can (apparently) reproduce this issue again with little difficulty,
> > so I'd be glad to help debug it.
> > 
> > -
> > Matt LaPlante
> 
> 	I would say that some I/O is blocking when it shouldn't
> 	with sockets which use ipsec.  If this is the case it is
> 	a kernel bug and named can't do anything to prevent it.
> 	Named marks all sockets as non-blocking.
> 
> 	Mark

	We should be able to prove / disprove the theory above with
	this patch.  It sets a 2 second alarm for all socket I/O.
	You need to build named with threaded disabled and run with
	"named -g <rest of named's arguements>".

	You will get "ALARM: message" printed to stderr.

	Mark

Index: lib/isc/unix/socket.c
===================================================================
RCS file: /proj/cvs/prod/bind9/lib/isc/unix/socket.c,v
retrieving revision 1.275
diff -u -r1.275 socket.c
--- lib/isc/unix/socket.c	14 Dec 2007 03:52:40 -0000	1.275
+++ lib/isc/unix/socket.c	24 Dec 2007 01:27:38 -0000
@@ -281,6 +281,18 @@
 
 #define SOCK_DEAD(s)			((s)->references == 0)
 
+#ifndef ISC_PLATFORM_USETHREADS
+static const char *alarm_msg = "ALARM: none\n";
+static void alarm_handler(int foo) {
+
+	UNUSED(foo);
+	/*
+	 * Write message to stderr.
+	 */
+	write(2, alarm_msg, strlen(alarm_msg));
+}
+#endif
+
 static void
 manager_log(isc_socketmgr_t *sockmgr,
 	    isc_logcategory_t *category, isc_logmodule_t *module, int level,
@@ -985,8 +997,16 @@
 	dump_msg(&msghdr);
 #endif
 
+#ifndef ISC_PLATFORM_USETHREADS
+	alarm(2);
+	alarm_msg = "ALARM: recvmsg\n";
+#endif
 	cc = recvmsg(sock->fd, &msghdr, 0);
 	recv_errno = errno;
+#ifndef ISC_PLATFORM_USETHREADS
+	alarm(0);
+	alarm_msg = "ALARM: none\n";
+#endif
 
 #if defined(ISC_SOCKET_DEBUG)
 	dump_msg(&msghdr);
@@ -1142,8 +1162,16 @@
 	build_msghdr_send(sock, dev, &msghdr, iov, &write_count);
 
  resend:
+#ifndef ISC_PLATFORM_USETHREADS
+	alarm(2);
+	alarm_msg = "ALARM: sendmsg\n";
+#endif
 	cc = sendmsg(sock->fd, &msghdr, 0);
 	send_errno = errno;
+#ifndef ISC_PLATFORM_USETHREADS
+	alarm(0);
+	alarm_msg = "ALARM: none\n";
+#endif
 
 	/*
 	 * Check for error or block condition.
@@ -2623,6 +2651,7 @@
 	REQUIRE(managerp != NULL && *managerp == NULL);
 
 #ifndef ISC_PLATFORM_USETHREADS
+	signal(SIGALRM, alarm_handler);
 	if (socketmgr != NULL) {
 		socketmgr->refs++;
 		*managerp = socketmgr;
@@ -3281,6 +3310,10 @@
 		goto cleanup;
 	}
 
+#ifndef ISC_PLATFORM_USETHREADS
+	alarm(2);
+	alarm_msg = "ALARM: connect unix\n";
+#endif
 	if (connect(s, (struct sockaddr *)&sockaddr->type.sunix,
 		    sizeof(sockaddr->type.sunix)) < 0) {
 		switch (errno) {
@@ -3306,6 +3339,10 @@
 			break;
 		}
 	}
+#ifndef ISC_PLATFORM_USETHREADS
+	alarm(0);
+	alarm_msg = "ALARM: none\n";
+#endif
  cleanup:
 	close(s);
 #else
@@ -3573,7 +3610,7 @@
 	isc_socket_connev_t *dev;
 	isc_task_t *ntask = NULL;
 	isc_socketmgr_t *manager;
-	int cc;
+	int cc, connect_errno;
 	char strbuf[ISC_STRERRORSIZE];
 
 	REQUIRE(VALID_SOCKET(sock));
@@ -3607,12 +3644,21 @@
 	 * outstanding, and it might happen to complete.
 	 */
 	sock->peer_address = *addr;
+#ifndef ISC_PLATFORM_USETHREADS
+	alarm(2);
+	alarm_msg = "ALARM: connect\n";
+#endif
 	cc = connect(sock->fd, &addr->type.sa, addr->length);
+	connect_errno = errno;
+#ifndef ISC_PLATFORM_USETHREADS
+	alarm(0);
+	alarm_msg = "ALARM: none\n";
+#endif
 	if (cc < 0) {
-		if (SOFT_ERROR(errno) || errno == EINPROGRESS)
+		if (SOFT_ERROR(connect_errno) || connect_errno == EINPROGRESS)
 			goto queue;
 
-		switch (errno) {
+		switch (connect_errno) {
 #define ERROR_MATCH(a, b) case a: dev->result = b; goto err_exit;
 			ERROR_MATCH(EACCES, ISC_R_NOPERM);
 			ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
@@ -3632,8 +3678,9 @@
 
 		sock->connected = 0;
 
-		isc__strerror(errno, strbuf, sizeof(strbuf));
-		UNEXPECTED_ERROR(__FILE__, __LINE__, "%d/%s", errno, strbuf);
+		isc__strerror(connect_errno, strbuf, sizeof(strbuf));
+		UNEXPECTED_ERROR(__FILE__, __LINE__, "%d/%s", connect_errno,
+				 strbuf);
 
 		UNLOCK(&sock->lock);
 		isc_event_free(ISC_EVENT_PTR(&dev));
-- 
Mark Andrews, ISC
1 Seymour St., Dundas Valley, NSW 2117, Australia
PHONE: +61 2 9871 4742                 INTERNET: Mark_Andrews at isc.org



More information about the bind-users mailing list